diff --git a/lark/lark.py b/lark/lark.py index bb15a2f..210ff6b 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -14,6 +14,7 @@ from .lexer import Lexer from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import get_frontend + class LarkOptions(object): """Specifies the options for Lark @@ -26,6 +27,8 @@ class LarkOptions(object): "standard": Use a standard lexer "contextual": Stronger lexer (only works with parser="lalr") "dynamic": Flexible and powerful (only with parser="earley") + "dynamic_complete": Same as dynamic, but tries *every* variation + of tokenizing possible. (only with parser="earley") "auto" (default): Choose for me based on grammar and parser ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" @@ -139,7 +142,7 @@ class Lark: else: assert False, self.options.parser lexer = self.options.lexer - assert lexer in ('standard', 'contextual', 'dynamic') + assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') if self.options.ambiguity == 'auto': if self.options.parser == 'earley': @@ -213,8 +216,7 @@ class Lark: stream = self.lexer.lex(text) if self.options.postlex: return self.options.postlex.process(stream) - else: - return stream + return stream def parse(self, text): "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise." @@ -231,4 +233,3 @@ class Lark: # else: # l = list(self.lex(text)) # return self.parser.parse(l) - diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 08e2d0e..4c604a2 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -83,7 +83,7 @@ class Earley(WithLexer): class XEarley: - def __init__(self, lexer_conf, parser_conf, options=None): + def __init__(self, lexer_conf, parser_conf, options=None, **kw): self.token_by_name = {t.name:t for t in lexer_conf.tokens} self._prepare_match(lexer_conf) @@ -92,7 +92,8 @@ class XEarley: self.match, resolve_ambiguity=get_ambiguity_resolver(options), ignore=lexer_conf.ignore, - predict_all=options.earley__predict_all + predict_all=options.earley__predict_all, + **kw ) def match(self, term, text, index=0): @@ -115,6 +116,11 @@ class XEarley: def parse(self, text): return self.parser.parse(text) +class XEarley_CompleteLex(XEarley): + def __init__(self, *args, **kw): + super(self).__init__(*args, complete_lex=True, **kw) + + class CYK(WithLexer): @@ -165,6 +171,8 @@ def get_frontend(parser, lexer): return Earley elif lexer=='dynamic': return XEarley + elif lexer=='dynamic_complete': + return XEarley_CompleteLex elif lexer=='contextual': raise ValueError('The Earley parser does not support the contextual parser') else: diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 02698fb..57e4a4a 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -28,13 +28,15 @@ from ..grammar import NonTerminal, Terminal from .earley import ApplyCallbacks, Item, Column + class Parser: - def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): + def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False, complete_lex=False): self.analysis = GrammarAnalyzer(parser_conf) self.parser_conf = parser_conf self.resolve_ambiguity = resolve_ambiguity self.ignore = [Terminal(t) for t in ignore] self.predict_all = predict_all + self.complete_lex = complete_lex self.FIRST = self.analysis.FIRST self.postprocess = {} @@ -101,12 +103,13 @@ class Parser: t = Token(item.expect.name, m.group(0), i, text_line, text_column) delayed_matches[m.end()].append(item.advance(t)) - s = m.group(0) - for j in range(1, len(s)): - m = match(item.expect, s[:-j]) - if m: - t = Token(item.expect.name, m.group(0), i, text_line, text_column) - delayed_matches[i+m.end()].append(item.advance(t)) + if self.complete_lex: + s = m.group(0) + for j in range(1, len(s)): + m = match(item.expect, s[:-j]) + if m: + t = Token(item.expect.name, m.group(0), i, text_line, text_column) + delayed_matches[i+m.end()].append(item.advance(t)) next_set = Column(i+1, self.FIRST, predict_all=self.predict_all) next_set.add(delayed_matches[i+1]) @@ -132,7 +135,6 @@ class Parser: else: text_column += 1 - predict_and_complete(column) # Parse ended. Now build a parse tree diff --git a/tests/test_parser.py b/tests/test_parser.py index 36cb142..c0a809f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -18,7 +18,7 @@ from io import ( logging.basicConfig(level=logging.INFO) from lark.lark import Lark -from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput +from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput from lark.tree import Tree from lark.visitors import Transformer @@ -184,6 +184,7 @@ def _make_full_earley_test(LEXER): l.parse(program) + @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") def test_earley3(self): "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" @@ -289,6 +290,7 @@ def _make_full_earley_test(LEXER): self.assertEqual(res, expected) + @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") def test_explicit_ambiguity2(self): grammar = r""" start: NAME+ @@ -1175,6 +1177,7 @@ _TO_TEST = [ ('standard', 'earley'), ('standard', 'cyk'), ('dynamic', 'earley'), + ('dynamic_complete', 'earley'), ('standard', 'lalr'), ('contextual', 'lalr'), # (None, 'earley'),