@@ -47,12 +47,12 @@ class TreeToJson(Transformer): | |||
true = lambda self, _: True | |||
false = lambda self, _: False | |||
# json_parser = Lark(json_grammar, parser='earley', lexer='standard') | |||
# def parse(x): | |||
# return TreeToJson().transform(json_parser.parse(x)) | |||
json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') | |||
def parse(x): | |||
return TreeToJson().transform(json_parser.parse(x)) | |||
json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
parse = json_parser.parse | |||
# json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
# parse = json_parser.parse | |||
def test(): | |||
test_json = ''' | |||
@@ -130,8 +130,10 @@ class Lark: | |||
self.options.lexer = 'standard' | |||
elif self.options.parser == 'earley': | |||
self.options.lexer = None | |||
else: | |||
assert False, self.options.parser | |||
lexer = self.options.lexer | |||
assert lexer in ('standard', 'contextual', None) | |||
assert lexer in ('standard', 'contextual', 'dynamic', None) | |||
if self.options.ambiguity == 'auto': | |||
if self.options.parser == 'earley': | |||
@@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term | |||
from .parsers import lalr_parser, old_earley, nearley, earley | |||
from .tree import Transformer | |||
from .parsers import xearley | |||
class WithLexer: | |||
def __init__(self, lexer_conf): | |||
self.lexer_conf = lexer_conf | |||
@@ -171,6 +173,31 @@ class Earley(WithLexer): | |||
tokens = self.lex(text) | |||
return self.parser.parse(tokens) | |||
class XEarley: | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
self.parser = xearley.Parser(rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
resolve_ambiguity=resolve_ambiguity) | |||
def _prepare_expansion(self, expansion): | |||
for sym in expansion: | |||
if is_terminal(sym): | |||
regexp = self.token_by_name[sym].pattern.to_regexp() | |||
width = sre_parse.parse(regexp).getwidth() | |||
yield Terminal_Regexp(regexp) | |||
else: | |||
yield sym | |||
def parse(self, text): | |||
return self.parser.parse(text) | |||
def get_frontend(parser, lexer): | |||
if parser=='lalr': | |||
if lexer is None: | |||
@@ -186,6 +213,8 @@ def get_frontend(parser, lexer): | |||
return Earley_NoLex | |||
elif lexer=='standard': | |||
return Earley | |||
elif lexer=='dynamic': | |||
return XEarley | |||
elif lexer=='contextual': | |||
raise ValueError('The Earley parser does not support the contextual parser') | |||
else: | |||
@@ -5,7 +5,7 @@ import logging | |||
from .test_trees import TestTrees | |||
# from .test_selectors import TestSelectors | |||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley | |||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic | |||
# from .test_grammars import TestPythonG, TestConfigG | |||
logging.basicConfig(level=logging.INFO) | |||
@@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase): | |||
# or re-processing of already completed rules. | |||
g = Lark(r"""start: B | |||
B: ("ab"|/[^b]/)* | |||
""", lexer=None) | |||
""", lexer='dynamic') | |||
self.assertEqual( g.parse('abc').children[0], 'abc') | |||
@@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase): | |||
g = Lark("""start: A "b" c | |||
A: "a"+ | |||
c: "abc" | |||
""", parser="earley", lexer=None) | |||
""", parser="earley", lexer='dynamic') | |||
x = g.parse('aaaababc') | |||
def test_earley_scanless2(self): | |||
@@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase): | |||
program = """c b r""" | |||
l = Lark(grammar, parser='earley', lexer=None) | |||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||
l.parse(program) | |||
def test_earley_scanless3(self): | |||
@@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase): | |||
A: "a"+ | |||
""" | |||
l = Lark(grammar, parser='earley', lexer=None) | |||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||
res = l.parse("aaa") | |||
self.assertEqual(res.children, ['aa', 'a']) | |||
@@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase): | |||
A: "a"+ | |||
""" | |||
l = Lark(grammar, parser='earley', lexer=None) | |||
l = Lark(grammar, parser='earley', lexer='dynamic') | |||
res = l.parse("aaa") | |||
self.assertEqual(res.children, ['aaa']) | |||
@@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase): | |||
empty2: | |||
""" | |||
parser = Lark(grammar, parser='earley', lexer=None) | |||
parser = Lark(grammar, parser='earley', lexer='dynamic') | |||
res = parser.parse('ab') | |||
empty_tree = Tree('empty', [Tree('empty2', [])]) | |||
@@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase): | |||
ab: "ab" | |||
""" | |||
parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||
parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') | |||
res = parser.parse('ab') | |||
self.assertEqual( res.data, '_ambig') | |||
@@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER): | |||
b: "b" | |||
a: "a" | |||
""") | |||
r = g.parse('aaabaab') | |||
self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | |||
r = g.parse('aaabaaba') | |||
@@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER): | |||
_TestParser.__name__ = _NAME | |||
globals()[_NAME] = _TestParser | |||
# Note: You still have to import them in __main__ for the tests to run | |||
_TO_TEST = [ | |||
('standard', 'earley'), | |||
('dynamic', 'earley'), | |||
('standard', 'lalr'), | |||
('contextual', 'lalr'), | |||
(None, 'earley'), | |||
] | |||
for LEXER, PARSER in _TO_TEST: | |||
_make_parser_test(LEXER, PARSER) | |||
for _LEXER, _PARSER in _TO_TEST: | |||
_make_parser_test(_LEXER, _PARSER) | |||
if __name__ == '__main__': | |||