Browse Source

Improved handling and performance of large grammars

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.6
Erez Sh 5 years ago
parent
commit
dcc98241c1
4 changed files with 19 additions and 7 deletions
  1. +3
    -1
      lark/load_grammar.py
  2. +2
    -1
      lark/parsers/earley.py
  3. +7
    -5
      lark/visitors.py
  4. +7
    -0
      tests/test_parser.py

+ 3
- 1
lark/load_grammar.py View File

@@ -275,7 +275,9 @@ class SimplifyRule_Visitor(Visitor):

def expansions(self, tree):
self._flatten(tree)
tree.children = dedup_list(tree.children)
# Ensure all children are unique
if len(set(tree.children)) != len(tree.children):
tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use


class RuleTreeToText(Transformer):


+ 2
- 1
lark/parsers/earley.py View File

@@ -39,7 +39,8 @@ class Parser:

self.forest_sum_visitor = None
for rule in parser_conf.rules:
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
if rule.origin not in self.predictions:
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

## Detect if any rules have priorities set. If the user specified priority = "none" then
# the priorities will be stripped from all rules before they reach us, allowing us to


+ 7
- 5
lark/visitors.py View File

@@ -158,28 +158,30 @@ class Transformer_NonRecursive(Transformer):
"Non-recursive. Doesn't change the original tree."

def transform(self, tree):
q = [tree]

# Tree to postfix
rev_postfix = []
q = [tree]
while q:
t = q.pop()
rev_postfix.append( t )
if isinstance(t, Tree):
q += t.children[::-1]
q += t.children

# Postfix to tree
stack = []
for x in reversed(rev_postfix):
if isinstance(x, Tree):
size = len(x.children)
args = [stack.pop() for _ in range(size)]
if size:
args = stack[-size:]
del stack[-size:]
else:
args = []
stack.append(self._call_userfunc(x, args))
else:
stack.append(x)

t ,= stack # We should have only one tree remaining
assert t == tree
return t




+ 7
- 0
tests/test_parser.py View File

@@ -35,6 +35,13 @@ def _read(n, *args):
return f.read()

class TestParsers(unittest.TestCase):
def test_big_list(self):
Lark(r"""
start: {}
""".format(
"|".join(['"%s"'%i for i in range(250)])
))

def test_same_ast(self):
"Tests that Earley and LALR parsers produce equal trees"
g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"


Loading…
Cancel
Save