| @@ -25,3 +25,16 @@ class UnexpectedToken(ParseError): | |||
| def is_terminal(sym): | |||
| return sym.isupper() or sym[0] == '$' | |||
| class LexerConf: | |||
| def __init__(self, tokens, ignore, postlex): | |||
| self.tokens = tokens | |||
| self.ignore = ignore | |||
| self.postlex = postlex | |||
| class ParserConf: | |||
| def __init__(self, rules, callback, start): | |||
| self.rules = rules | |||
| self.callback = callback | |||
| self.start = start | |||
| @@ -5,7 +5,7 @@ import os | |||
| from .utils import STRING_TYPE, inline_args | |||
| from .load_grammar import load_grammar | |||
| from .tree import Tree, Transformer | |||
| from .common import GrammarError | |||
| from .common import GrammarError, LexerConf, ParserConf | |||
| from .lexer import Lexer | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| @@ -105,45 +105,46 @@ class Lark: | |||
| assert isinstance(grammar, STRING_TYPE) | |||
| if self.options.cache_grammar: | |||
| if self.options.cache_grammar or self.options.keep_all_tokens: | |||
| raise NotImplementedError("Not available yet") | |||
| assert not self.options.profile, "Feature temporarily disabled" | |||
| self.profiler = Profiler() if self.options.profile else None | |||
| self.tokens, self.rules = load_grammar(grammar) | |||
| tokens, self.rules = load_grammar(grammar) | |||
| self.ignore_tokens = [] | |||
| for tokendef, flags in tokens: | |||
| for flag in flags: | |||
| if flag == 'ignore': | |||
| self.ignore_tokens.append(tokendef.name) | |||
| else: | |||
| raise GrammarError("No such flag: %s" % flag) | |||
| self.lexer_conf = LexerConf([t[0] for t in tokens], self.ignore_tokens, self.options.postlex) | |||
| if not self.options.only_lex: | |||
| self.parser_engine = ENGINE_DICT[self.options.parser]() | |||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
| self.parser = self._build_parser() | |||
| self.lexer = self._build_lexer() | |||
| else: | |||
| self.lexer = self._build_lexer() | |||
| if self.profiler: self.profiler.enter_section('outside_lark') | |||
| __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC | |||
| def _build_lexer(self): | |||
| ignore_tokens = [] | |||
| tokens = [] | |||
| for tokendef, flags in self.tokens: | |||
| for flag in flags: | |||
| if flag == 'ignore': | |||
| ignore_tokens.append(tokendef.name) | |||
| else: | |||
| raise GrammarError("No such flag: %s" % flag) | |||
| tokens.append(tokendef) | |||
| return Lexer(tokens, ignore=ignore_tokens) | |||
| return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | |||
| def _build_parser(self): | |||
| self.parser_class = ENGINE_DICT[self.options.parser] | |||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
| rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
| if self.profiler: | |||
| for f in dir(callback): | |||
| if not f.startswith('__'): | |||
| if not (f.startswith('__') and f.endswith('__')): | |||
| setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) | |||
| return self.parser_engine.build_parser(rules, callback, self.options.start) | |||
| parser_conf = ParserConf(rules, callback, self.options.start) | |||
| return self.parser_class(self.lexer_conf, parser_conf) | |||
| def lex(self, text): | |||
| @@ -156,15 +157,17 @@ class Lark: | |||
| def parse(self, text): | |||
| assert not self.options.only_lex | |||
| if self.profiler: | |||
| self.profiler.enter_section('lex') | |||
| l = list(self.lex(text)) | |||
| self.profiler.enter_section('parse') | |||
| try: | |||
| return self.parser.parse(l) | |||
| finally: | |||
| self.profiler.enter_section('outside_lark') | |||
| else: | |||
| l = list(self.lex(text)) | |||
| return self.parser.parse(l) | |||
| return self.parser.parse(text) | |||
| # if self.profiler: | |||
| # self.profiler.enter_section('lex') | |||
| # l = list(self.lex(text)) | |||
| # self.profiler.enter_section('parse') | |||
| # try: | |||
| # return self.parser.parse(l) | |||
| # finally: | |||
| # self.profiler.enter_section('outside_lark') | |||
| # else: | |||
| # l = list(self.lex(text)) | |||
| # return self.parser.parse(l) | |||
| @@ -6,7 +6,7 @@ from .lexer import Lexer, Token, UnexpectedInput, TokenDef__Str, TokenDef__Regex | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| from .parser_frontends import LALR | |||
| from .parsers.lalr_parser import UnexpectedToken | |||
| from .common import is_terminal, GrammarError | |||
| from .common import is_terminal, GrammarError, LexerConf, ParserConf | |||
| from .tree import Tree as T, Transformer, InlineTransformer, Visitor | |||
| @@ -279,11 +279,12 @@ class ExtractAnonTokens(InlineTransformer): | |||
| class GrammarLoader: | |||
| def __init__(self): | |||
| tokens = [TokenDef__Regexp(name, value) for name, value in TOKENS.items()] | |||
| self.lexer = Lexer(tokens, ignore=['WS', 'COMMENT']) | |||
| d = {r: [(x.split(), None) for x in xs] for r, xs in RULES.items()} | |||
| rules, callback = ParseTreeBuilder(T).create_tree_builder(d, None) | |||
| self.parser = LALR().build_parser(rules, callback, 'start') | |||
| lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'], None) | |||
| parser_conf = ParserConf(rules, callback, 'start') | |||
| self.parser = LALR(lexer_conf, parser_conf) | |||
| self.simplify_tree = SimplifyTree() | |||
| self.simplify_rule = SimplifyRule_Visitor() | |||
| @@ -291,12 +292,9 @@ class GrammarLoader: | |||
| def load_grammar(self, grammar_text): | |||
| try: | |||
| token_stream = list(self.lexer.lex(grammar_text+"\n")) | |||
| tree = self.simplify_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||
| except UnexpectedInput as e: | |||
| raise GrammarError("Unexpected input %r at line %d column %d" % (e.context, e.line, e.column)) | |||
| try: | |||
| tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) | |||
| except UnexpectedToken as e: | |||
| if '_COLON' in e.expected: | |||
| raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | |||
| @@ -1,32 +1,55 @@ | |||
| from .lexer import Lexer | |||
| from .parsers.lalr_analysis import GrammarAnalyzer | |||
| from .common import is_terminal | |||
| from .parsers import lalr_parser, earley | |||
| class LALR: | |||
| def build_parser(self, rules, callback, start): | |||
| ga = GrammarAnalyzer(rules, start) | |||
| ga.analyze() | |||
| return lalr_parser.Parser(ga, callback) | |||
| class WithLexer: | |||
| def __init__(self, lexer_conf): | |||
| self.lexer_conf = lexer_conf | |||
| self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | |||
| class Earley: | |||
| @staticmethod | |||
| def _process_expansion(x): | |||
| return [{'literal': s} if is_terminal(s) else s for s in x] | |||
| def lex(self, text): | |||
| stream = self.lexer.lex(text) | |||
| if self.lexer_conf.postlex: | |||
| return self.lexer_conf.postlex.process(stream) | |||
| else: | |||
| return stream | |||
| def build_parser(self, rules, callback, start): | |||
| rules = [{'name':n, 'symbols': self._process_expansion(x), 'postprocess':getattr(callback, a)} for n,x,a in rules] | |||
| return EarleyParser(earley.Parser(rules, start)) | |||
| class LALR(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| class EarleyParser: | |||
| def __init__(self, parser): | |||
| self.parser = parser | |||
| analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||
| analyzer.analyze() | |||
| self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) | |||
| def parse(self, text): | |||
| res = self.parser.parse(text) | |||
| tokens = list(self.lex(text)) | |||
| return self.parser.parse(tokens) | |||
| class Earley(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| rules = [{'name':n, | |||
| 'symbols': self._process_expansion(x), | |||
| 'postprocess': getattr(parser_conf.callback, a)} | |||
| for n,x,a in parser_conf.rules] | |||
| self.parser = earley.Parser(rules, parser_conf.start) | |||
| def parse(self, text): | |||
| tokens = list(self.lex(text)) | |||
| res = self.parser.parse(tokens) | |||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
| return res[0] | |||
| @staticmethod | |||
| def _process_expansion(x): | |||
| return [{'literal': s} if is_terminal(s) else s for s in x] | |||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||