diff --git a/examples/json_parser.py b/examples/json_parser.py index 4f5feaf..56d6a0b 100644 --- a/examples/json_parser.py +++ b/examples/json_parser.py @@ -47,12 +47,12 @@ class TreeToJson(Transformer): true = lambda self, _: True false = lambda self, _: False -# json_parser = Lark(json_grammar, parser='earley', lexer='standard') -# def parse(x): -# return TreeToJson().transform(json_parser.parse(x)) +json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') +def parse(x): + return TreeToJson().transform(json_parser.parse(x)) -json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) -parse = json_parser.parse +# json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) +# parse = json_parser.parse def test(): test_json = ''' diff --git a/lark/lark.py b/lark/lark.py index b839650..7cd73d1 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -130,8 +130,10 @@ class Lark: self.options.lexer = 'standard' elif self.options.parser == 'earley': self.options.lexer = None + else: + assert False, self.options.parser lexer = self.options.lexer - assert lexer in ('standard', 'contextual', None) + assert lexer in ('standard', 'contextual', 'dynamic', None) if self.options.ambiguity == 'auto': if self.options.parser == 'earley': diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 1646726..be20ccc 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term from .parsers import lalr_parser, old_earley, nearley, earley from .tree import Transformer +from .parsers import xearley + class WithLexer: def __init__(self, lexer_conf): self.lexer_conf = lexer_conf @@ -171,6 +173,31 @@ class Earley(WithLexer): tokens = self.lex(text) return self.parser.parse(tokens) + +class XEarley: + def __init__(self, lexer_conf, parser_conf, options=None): + self.token_by_name = {t.name:t for t in lexer_conf.tokens} + + rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] + + resolve_ambiguity = (options.ambiguity=='resolve') if options else True + self.parser = xearley.Parser(rules, + parser_conf.start, + parser_conf.callback, + resolve_ambiguity=resolve_ambiguity) + + def _prepare_expansion(self, expansion): + for sym in expansion: + if is_terminal(sym): + regexp = self.token_by_name[sym].pattern.to_regexp() + width = sre_parse.parse(regexp).getwidth() + yield Terminal_Regexp(regexp) + else: + yield sym + + def parse(self, text): + return self.parser.parse(text) + def get_frontend(parser, lexer): if parser=='lalr': if lexer is None: @@ -186,6 +213,8 @@ def get_frontend(parser, lexer): return Earley_NoLex elif lexer=='standard': return Earley + elif lexer=='dynamic': + return XEarley elif lexer=='contextual': raise ValueError('The Earley parser does not support the contextual parser') else: diff --git a/tests/__main__.py b/tests/__main__.py index 9f013a1..1811f81 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -5,7 +5,7 @@ import logging from .test_trees import TestTrees # from .test_selectors import TestSelectors -from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley +from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic # from .test_grammars import TestPythonG, TestConfigG logging.basicConfig(level=logging.INFO) diff --git a/tests/test_parser.py b/tests/test_parser.py index 6063096..3a699c9 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase): # or re-processing of already completed rules. g = Lark(r"""start: B B: ("ab"|/[^b]/)* - """, lexer=None) + """, lexer='dynamic') self.assertEqual( g.parse('abc').children[0], 'abc') @@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase): g = Lark("""start: A "b" c A: "a"+ c: "abc" - """, parser="earley", lexer=None) + """, parser="earley", lexer='dynamic') x = g.parse('aaaababc') def test_earley_scanless2(self): @@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase): program = """c b r""" - l = Lark(grammar, parser='earley', lexer=None) + l = Lark(grammar, parser='earley', lexer='dynamic') l.parse(program) def test_earley_scanless3(self): @@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase): A: "a"+ """ - l = Lark(grammar, parser='earley', lexer=None) + l = Lark(grammar, parser='earley', lexer='dynamic') res = l.parse("aaa") self.assertEqual(res.children, ['aa', 'a']) @@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase): A: "a"+ """ - l = Lark(grammar, parser='earley', lexer=None) + l = Lark(grammar, parser='earley', lexer='dynamic') res = l.parse("aaa") self.assertEqual(res.children, ['aaa']) @@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase): empty2: """ - parser = Lark(grammar, parser='earley', lexer=None) + parser = Lark(grammar, parser='earley', lexer='dynamic') res = parser.parse('ab') empty_tree = Tree('empty', [Tree('empty2', [])]) @@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase): ab: "ab" """ - parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') + parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') res = parser.parse('ab') self.assertEqual( res.data, '_ambig') @@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER): b: "b" a: "a" """) + r = g.parse('aaabaab') self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) r = g.parse('aaabaaba') @@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER): _TestParser.__name__ = _NAME globals()[_NAME] = _TestParser +# Note: You still have to import them in __main__ for the tests to run _TO_TEST = [ ('standard', 'earley'), + ('dynamic', 'earley'), ('standard', 'lalr'), ('contextual', 'lalr'), (None, 'earley'), ] -for LEXER, PARSER in _TO_TEST: - _make_parser_test(LEXER, PARSER) +for _LEXER, _PARSER in _TO_TEST: + _make_parser_test(_LEXER, _PARSER) if __name__ == '__main__':