Selaa lähdekoodia

Mid-work. A nice first try

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 vuotta sitten
vanhempi
commit
8e2221b2fd
5 muutettua tiedostoa jossa 50 lisäystä ja 16 poistoa
  1. +5
    -5
      examples/json_parser.py
  2. +3
    -1
      lark/lark.py
  3. +29
    -0
      lark/parser_frontends.py
  4. +1
    -1
      tests/__main__.py
  5. +12
    -9
      tests/test_parser.py

+ 5
- 5
examples/json_parser.py Näytä tiedosto

@@ -47,12 +47,12 @@ class TreeToJson(Transformer):
true = lambda self, _: True
false = lambda self, _: False

# json_parser = Lark(json_grammar, parser='earley', lexer='standard')
# def parse(x):
# return TreeToJson().transform(json_parser.parse(x))
json_parser = Lark(json_grammar, parser='earley', lexer='dynamic')
def parse(x):
return TreeToJson().transform(json_parser.parse(x))

json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson())
parse = json_parser.parse
# json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson())
# parse = json_parser.parse

def test():
test_json = '''


+ 3
- 1
lark/lark.py Näytä tiedosto

@@ -130,8 +130,10 @@ class Lark:
self.options.lexer = 'standard'
elif self.options.parser == 'earley':
self.options.lexer = None
else:
assert False, self.options.parser
lexer = self.options.lexer
assert lexer in ('standard', 'contextual', None)
assert lexer in ('standard', 'contextual', 'dynamic', None)

if self.options.ambiguity == 'auto':
if self.options.parser == 'earley':


+ 29
- 0
lark/parser_frontends.py Näytä tiedosto

@@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term
from .parsers import lalr_parser, old_earley, nearley, earley
from .tree import Transformer

from .parsers import xearley

class WithLexer:
def __init__(self, lexer_conf):
self.lexer_conf = lexer_conf
@@ -171,6 +173,31 @@ class Earley(WithLexer):
tokens = self.lex(text)
return self.parser.parse(tokens)


class XEarley:
def __init__(self, lexer_conf, parser_conf, options=None):
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules]

resolve_ambiguity = (options.ambiguity=='resolve') if options else True
self.parser = xearley.Parser(rules,
parser_conf.start,
parser_conf.callback,
resolve_ambiguity=resolve_ambiguity)

def _prepare_expansion(self, expansion):
for sym in expansion:
if is_terminal(sym):
regexp = self.token_by_name[sym].pattern.to_regexp()
width = sre_parse.parse(regexp).getwidth()
yield Terminal_Regexp(regexp)
else:
yield sym

def parse(self, text):
return self.parser.parse(text)

def get_frontend(parser, lexer):
if parser=='lalr':
if lexer is None:
@@ -186,6 +213,8 @@ def get_frontend(parser, lexer):
return Earley_NoLex
elif lexer=='standard':
return Earley
elif lexer=='dynamic':
return XEarley
elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser')
else:


+ 1
- 1
tests/__main__.py Näytä tiedosto

@@ -5,7 +5,7 @@ import logging

from .test_trees import TestTrees
# from .test_selectors import TestSelectors
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic
# from .test_grammars import TestPythonG, TestConfigG

logging.basicConfig(level=logging.INFO)


+ 12
- 9
tests/test_parser.py Näytä tiedosto

@@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase):
# or re-processing of already completed rules.
g = Lark(r"""start: B
B: ("ab"|/[^b]/)*
""", lexer=None)
""", lexer='dynamic')

self.assertEqual( g.parse('abc').children[0], 'abc')

@@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase):
g = Lark("""start: A "b" c
A: "a"+
c: "abc"
""", parser="earley", lexer=None)
""", parser="earley", lexer='dynamic')
x = g.parse('aaaababc')

def test_earley_scanless2(self):
@@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase):

program = """c b r"""

l = Lark(grammar, parser='earley', lexer=None)
l = Lark(grammar, parser='earley', lexer='dynamic')
l.parse(program)

def test_earley_scanless3(self):
@@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase):
A: "a"+
"""

l = Lark(grammar, parser='earley', lexer=None)
l = Lark(grammar, parser='earley', lexer='dynamic')
res = l.parse("aaa")
self.assertEqual(res.children, ['aa', 'a'])

@@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase):
A: "a"+
"""

l = Lark(grammar, parser='earley', lexer=None)
l = Lark(grammar, parser='earley', lexer='dynamic')
res = l.parse("aaa")
self.assertEqual(res.children, ['aaa'])

@@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase):
empty2:
"""

parser = Lark(grammar, parser='earley', lexer=None)
parser = Lark(grammar, parser='earley', lexer='dynamic')
res = parser.parse('ab')

empty_tree = Tree('empty', [Tree('empty2', [])])
@@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase):
ab: "ab"
"""

parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit')
parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit')
res = parser.parse('ab')

self.assertEqual( res.data, '_ambig')
@@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER):
b: "b"
a: "a"
""")

r = g.parse('aaabaab')
self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
r = g.parse('aaabaaba')
@@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER):
_TestParser.__name__ = _NAME
globals()[_NAME] = _TestParser

# Note: You still have to import them in __main__ for the tests to run
_TO_TEST = [
('standard', 'earley'),
('dynamic', 'earley'),
('standard', 'lalr'),
('contextual', 'lalr'),
(None, 'earley'),
]

for LEXER, PARSER in _TO_TEST:
_make_parser_test(LEXER, PARSER)
for _LEXER, _PARSER in _TO_TEST:
_make_parser_test(_LEXER, _PARSER)


if __name__ == '__main__':


Ladataan…
Peruuta
Tallenna