@@ -47,12 +47,12 @@ class TreeToJson(Transformer): | |||||
true = lambda self, _: True | true = lambda self, _: True | ||||
false = lambda self, _: False | false = lambda self, _: False | ||||
# json_parser = Lark(json_grammar, parser='earley', lexer='standard') | |||||
# def parse(x): | |||||
# return TreeToJson().transform(json_parser.parse(x)) | |||||
json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') | |||||
def parse(x): | |||||
return TreeToJson().transform(json_parser.parse(x)) | |||||
json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
parse = json_parser.parse | |||||
# json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
# parse = json_parser.parse | |||||
def test(): | def test(): | ||||
test_json = ''' | test_json = ''' | ||||
@@ -130,8 +130,10 @@ class Lark: | |||||
self.options.lexer = 'standard' | self.options.lexer = 'standard' | ||||
elif self.options.parser == 'earley': | elif self.options.parser == 'earley': | ||||
self.options.lexer = None | self.options.lexer = None | ||||
else: | |||||
assert False, self.options.parser | |||||
lexer = self.options.lexer | lexer = self.options.lexer | ||||
assert lexer in ('standard', 'contextual', None) | |||||
assert lexer in ('standard', 'contextual', 'dynamic', None) | |||||
if self.options.ambiguity == 'auto': | if self.options.ambiguity == 'auto': | ||||
if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
@@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term | |||||
from .parsers import lalr_parser, old_earley, nearley, earley | from .parsers import lalr_parser, old_earley, nearley, earley | ||||
from .tree import Transformer | from .tree import Transformer | ||||
from .parsers import xearley | |||||
class WithLexer: | class WithLexer: | ||||
def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
@@ -171,6 +173,31 @@ class Earley(WithLexer): | |||||
tokens = self.lex(text) | tokens = self.lex(text) | ||||
return self.parser.parse(tokens) | return self.parser.parse(tokens) | ||||
class XEarley: | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
self.parser = xearley.Parser(rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
resolve_ambiguity=resolve_ambiguity) | |||||
def _prepare_expansion(self, expansion): | |||||
for sym in expansion: | |||||
if is_terminal(sym): | |||||
regexp = self.token_by_name[sym].pattern.to_regexp() | |||||
width = sre_parse.parse(regexp).getwidth() | |||||
yield Terminal_Regexp(regexp) | |||||
else: | |||||
yield sym | |||||
def parse(self, text): | |||||
return self.parser.parse(text) | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
if parser=='lalr': | if parser=='lalr': | ||||
if lexer is None: | if lexer is None: | ||||
@@ -186,6 +213,8 @@ def get_frontend(parser, lexer): | |||||
return Earley_NoLex | return Earley_NoLex | ||||
elif lexer=='standard': | elif lexer=='standard': | ||||
return Earley | return Earley | ||||
elif lexer=='dynamic': | |||||
return XEarley | |||||
elif lexer=='contextual': | elif lexer=='contextual': | ||||
raise ValueError('The Earley parser does not support the contextual parser') | raise ValueError('The Earley parser does not support the contextual parser') | ||||
else: | else: | ||||
@@ -5,7 +5,7 @@ import logging | |||||
from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
# from .test_selectors import TestSelectors | # from .test_selectors import TestSelectors | ||||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley | |||||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic | |||||
# from .test_grammars import TestPythonG, TestConfigG | # from .test_grammars import TestPythonG, TestConfigG | ||||
logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
@@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase): | |||||
# or re-processing of already completed rules. | # or re-processing of already completed rules. | ||||
g = Lark(r"""start: B | g = Lark(r"""start: B | ||||
B: ("ab"|/[^b]/)* | B: ("ab"|/[^b]/)* | ||||
""", lexer=None) | |||||
""", lexer='dynamic') | |||||
self.assertEqual( g.parse('abc').children[0], 'abc') | self.assertEqual( g.parse('abc').children[0], 'abc') | ||||
@@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase): | |||||
g = Lark("""start: A "b" c | g = Lark("""start: A "b" c | ||||
A: "a"+ | A: "a"+ | ||||
c: "abc" | c: "abc" | ||||
""", parser="earley", lexer=None) | |||||
""", parser="earley", lexer='dynamic') | |||||
x = g.parse('aaaababc') | x = g.parse('aaaababc') | ||||
def test_earley_scanless2(self): | def test_earley_scanless2(self): | ||||
@@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase): | |||||
program = """c b r""" | program = """c b r""" | ||||
l = Lark(grammar, parser='earley', lexer=None) | |||||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
l.parse(program) | l.parse(program) | ||||
def test_earley_scanless3(self): | def test_earley_scanless3(self): | ||||
@@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase): | |||||
A: "a"+ | A: "a"+ | ||||
""" | """ | ||||
l = Lark(grammar, parser='earley', lexer=None) | |||||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
res = l.parse("aaa") | res = l.parse("aaa") | ||||
self.assertEqual(res.children, ['aa', 'a']) | self.assertEqual(res.children, ['aa', 'a']) | ||||
@@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase): | |||||
A: "a"+ | A: "a"+ | ||||
""" | """ | ||||
l = Lark(grammar, parser='earley', lexer=None) | |||||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
res = l.parse("aaa") | res = l.parse("aaa") | ||||
self.assertEqual(res.children, ['aaa']) | self.assertEqual(res.children, ['aaa']) | ||||
@@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase): | |||||
empty2: | empty2: | ||||
""" | """ | ||||
parser = Lark(grammar, parser='earley', lexer=None) | |||||
parser = Lark(grammar, parser='earley', lexer='dynamic') | |||||
res = parser.parse('ab') | res = parser.parse('ab') | ||||
empty_tree = Tree('empty', [Tree('empty2', [])]) | empty_tree = Tree('empty', [Tree('empty2', [])]) | ||||
@@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase): | |||||
ab: "ab" | ab: "ab" | ||||
""" | """ | ||||
parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||||
parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') | |||||
res = parser.parse('ab') | res = parser.parse('ab') | ||||
self.assertEqual( res.data, '_ambig') | self.assertEqual( res.data, '_ambig') | ||||
@@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
b: "b" | b: "b" | ||||
a: "a" | a: "a" | ||||
""") | """) | ||||
r = g.parse('aaabaab') | r = g.parse('aaabaab') | ||||
self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | ||||
r = g.parse('aaabaaba') | r = g.parse('aaabaaba') | ||||
@@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER): | |||||
_TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||
globals()[_NAME] = _TestParser | globals()[_NAME] = _TestParser | ||||
# Note: You still have to import them in __main__ for the tests to run | |||||
_TO_TEST = [ | _TO_TEST = [ | ||||
('standard', 'earley'), | ('standard', 'earley'), | ||||
('dynamic', 'earley'), | |||||
('standard', 'lalr'), | ('standard', 'lalr'), | ||||
('contextual', 'lalr'), | ('contextual', 'lalr'), | ||||
(None, 'earley'), | (None, 'earley'), | ||||
] | ] | ||||
for LEXER, PARSER in _TO_TEST: | |||||
_make_parser_test(LEXER, PARSER) | |||||
for _LEXER, _PARSER in _TO_TEST: | |||||
_make_parser_test(_LEXER, _PARSER) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||