| @@ -47,12 +47,12 @@ class TreeToJson(Transformer): | |||||
| true = lambda self, _: True | true = lambda self, _: True | ||||
| false = lambda self, _: False | false = lambda self, _: False | ||||
| # json_parser = Lark(json_grammar, parser='earley', lexer='standard') | |||||
| # def parse(x): | |||||
| # return TreeToJson().transform(json_parser.parse(x)) | |||||
| json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') | |||||
| def parse(x): | |||||
| return TreeToJson().transform(json_parser.parse(x)) | |||||
| json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
| parse = json_parser.parse | |||||
| # json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
| # parse = json_parser.parse | |||||
| def test(): | def test(): | ||||
| test_json = ''' | test_json = ''' | ||||
| @@ -130,8 +130,10 @@ class Lark: | |||||
| self.options.lexer = 'standard' | self.options.lexer = 'standard' | ||||
| elif self.options.parser == 'earley': | elif self.options.parser == 'earley': | ||||
| self.options.lexer = None | self.options.lexer = None | ||||
| else: | |||||
| assert False, self.options.parser | |||||
| lexer = self.options.lexer | lexer = self.options.lexer | ||||
| assert lexer in ('standard', 'contextual', None) | |||||
| assert lexer in ('standard', 'contextual', 'dynamic', None) | |||||
| if self.options.ambiguity == 'auto': | if self.options.ambiguity == 'auto': | ||||
| if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
| @@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term | |||||
| from .parsers import lalr_parser, old_earley, nearley, earley | from .parsers import lalr_parser, old_earley, nearley, earley | ||||
| from .tree import Transformer | from .tree import Transformer | ||||
| from .parsers import xearley | |||||
| class WithLexer: | class WithLexer: | ||||
| def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
| self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
| @@ -171,6 +173,31 @@ class Earley(WithLexer): | |||||
| tokens = self.lex(text) | tokens = self.lex(text) | ||||
| return self.parser.parse(tokens) | return self.parser.parse(tokens) | ||||
| class XEarley: | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
| self.parser = xearley.Parser(rules, | |||||
| parser_conf.start, | |||||
| parser_conf.callback, | |||||
| resolve_ambiguity=resolve_ambiguity) | |||||
| def _prepare_expansion(self, expansion): | |||||
| for sym in expansion: | |||||
| if is_terminal(sym): | |||||
| regexp = self.token_by_name[sym].pattern.to_regexp() | |||||
| width = sre_parse.parse(regexp).getwidth() | |||||
| yield Terminal_Regexp(regexp) | |||||
| else: | |||||
| yield sym | |||||
| def parse(self, text): | |||||
| return self.parser.parse(text) | |||||
| def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
| if parser=='lalr': | if parser=='lalr': | ||||
| if lexer is None: | if lexer is None: | ||||
| @@ -186,6 +213,8 @@ def get_frontend(parser, lexer): | |||||
| return Earley_NoLex | return Earley_NoLex | ||||
| elif lexer=='standard': | elif lexer=='standard': | ||||
| return Earley | return Earley | ||||
| elif lexer=='dynamic': | |||||
| return XEarley | |||||
| elif lexer=='contextual': | elif lexer=='contextual': | ||||
| raise ValueError('The Earley parser does not support the contextual parser') | raise ValueError('The Earley parser does not support the contextual parser') | ||||
| else: | else: | ||||
| @@ -5,7 +5,7 @@ import logging | |||||
| from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
| # from .test_selectors import TestSelectors | # from .test_selectors import TestSelectors | ||||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley | |||||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic | |||||
| # from .test_grammars import TestPythonG, TestConfigG | # from .test_grammars import TestPythonG, TestConfigG | ||||
| logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
| @@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase): | |||||
| # or re-processing of already completed rules. | # or re-processing of already completed rules. | ||||
| g = Lark(r"""start: B | g = Lark(r"""start: B | ||||
| B: ("ab"|/[^b]/)* | B: ("ab"|/[^b]/)* | ||||
| """, lexer=None) | |||||
| """, lexer='dynamic') | |||||
| self.assertEqual( g.parse('abc').children[0], 'abc') | self.assertEqual( g.parse('abc').children[0], 'abc') | ||||
| @@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase): | |||||
| g = Lark("""start: A "b" c | g = Lark("""start: A "b" c | ||||
| A: "a"+ | A: "a"+ | ||||
| c: "abc" | c: "abc" | ||||
| """, parser="earley", lexer=None) | |||||
| """, parser="earley", lexer='dynamic') | |||||
| x = g.parse('aaaababc') | x = g.parse('aaaababc') | ||||
| def test_earley_scanless2(self): | def test_earley_scanless2(self): | ||||
| @@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase): | |||||
| program = """c b r""" | program = """c b r""" | ||||
| l = Lark(grammar, parser='earley', lexer=None) | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| l.parse(program) | l.parse(program) | ||||
| def test_earley_scanless3(self): | def test_earley_scanless3(self): | ||||
| @@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase): | |||||
| A: "a"+ | A: "a"+ | ||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', lexer=None) | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = l.parse("aaa") | res = l.parse("aaa") | ||||
| self.assertEqual(res.children, ['aa', 'a']) | self.assertEqual(res.children, ['aa', 'a']) | ||||
| @@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase): | |||||
| A: "a"+ | A: "a"+ | ||||
| """ | """ | ||||
| l = Lark(grammar, parser='earley', lexer=None) | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = l.parse("aaa") | res = l.parse("aaa") | ||||
| self.assertEqual(res.children, ['aaa']) | self.assertEqual(res.children, ['aaa']) | ||||
| @@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase): | |||||
| empty2: | empty2: | ||||
| """ | """ | ||||
| parser = Lark(grammar, parser='earley', lexer=None) | |||||
| parser = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = parser.parse('ab') | res = parser.parse('ab') | ||||
| empty_tree = Tree('empty', [Tree('empty2', [])]) | empty_tree = Tree('empty', [Tree('empty2', [])]) | ||||
| @@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase): | |||||
| ab: "ab" | ab: "ab" | ||||
| """ | """ | ||||
| parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||||
| parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') | |||||
| res = parser.parse('ab') | res = parser.parse('ab') | ||||
| self.assertEqual( res.data, '_ambig') | self.assertEqual( res.data, '_ambig') | ||||
| @@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| b: "b" | b: "b" | ||||
| a: "a" | a: "a" | ||||
| """) | """) | ||||
| r = g.parse('aaabaab') | r = g.parse('aaabaab') | ||||
| self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | ||||
| r = g.parse('aaabaaba') | r = g.parse('aaabaaba') | ||||
| @@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER): | |||||
| _TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||
| globals()[_NAME] = _TestParser | globals()[_NAME] = _TestParser | ||||
| # Note: You still have to import them in __main__ for the tests to run | |||||
| _TO_TEST = [ | _TO_TEST = [ | ||||
| ('standard', 'earley'), | ('standard', 'earley'), | ||||
| ('dynamic', 'earley'), | |||||
| ('standard', 'lalr'), | ('standard', 'lalr'), | ||||
| ('contextual', 'lalr'), | ('contextual', 'lalr'), | ||||
| (None, 'earley'), | (None, 'earley'), | ||||
| ] | ] | ||||
| for LEXER, PARSER in _TO_TEST: | |||||
| _make_parser_test(LEXER, PARSER) | |||||
| for _LEXER, _PARSER in _TO_TEST: | |||||
| _make_parser_test(_LEXER, _PARSER) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||