Browse Source

Brought back the removal of unused rules/terminals in imported grammars

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 3 years ago
parent
commit
fd3be9d8d6
3 changed files with 113 additions and 82 deletions
  1. +77
    -51
      lark/load_grammar.py
  2. +36
    -0
      tests/test_grammar.py
  3. +0
    -31
      tests/test_parser.py

+ 77
- 51
lark/load_grammar.py View File

@@ -729,15 +729,12 @@ stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)



def resolve_term_references(term_defs):
def resolve_term_references(term_dict):
# TODO Solve with transitive closure (maybe)

term_dict = {k:t for k, (t,_p) in term_defs}
assert len(term_dict) == len(term_defs), "Same name defined twice?"

while True:
changed = False
for name, (token_tree, _p) in term_defs:
for name, token_tree in term_dict.items():
if token_tree is None: # Terminal added through %declare
continue
for exp in token_tree.find_data('value'):
@@ -849,6 +846,32 @@ def _parse_grammar(text, name, start='start'):
return PrepareGrammar().transform(tree)


def _get_mangle(prefix, aliases, base_mangle=None):
def mangle(s):
if s in aliases:
s = aliases[s]
else:
if s[0] == '_':
s = '_%s__%s' % (prefix, s[1:])
else:
s = '%s__%s' % (prefix, s)
if base_mangle is not None:
s = base_mangle(s)
return s
return mangle

def _mangle_exp(exp, mangle):
if mangle is None:
return exp
exp = deepcopy(exp) # TODO: is this needed
for t in exp.iter_subtrees():
for i, c in enumerate(t.children):
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
t.children[i] = Token(c.type, mangle(c.value))
return exp



class GrammarBuilder:
def __init__(self, global_keep_all_tokens=False, import_paths=None):
self.global_keep_all_tokens = global_keep_all_tokens
@@ -937,33 +960,6 @@ class GrammarBuilder:
for name in names:
self._define(name, None)

def _mangle_exp(self, exp, mangle):
if mangle is None:
return exp
exp = deepcopy(exp) # TODO: is this needed
for t in exp.iter_subtrees():
for i, c in enumerate(t.children):
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
t.children[i] = Token(c.type, mangle(c.value))
return exp


def _unpack_definition(self, tree, mangle):
if tree.data == 'rule':
name, params, exp, opts = options_from_rule(*tree.children)
else:
name = tree.children[0].value
params = () # TODO terminal templates
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
exp = tree.children[-1]

if mangle is not None:
params = tuple(mangle(p) for p in params)
name = mangle(name)

exp = self._mangle_exp(exp, mangle)
return name, exp, params, opts

def _unpack_import(self, stmt, grammar_name):
if len(stmt.children) > 1:
path_node, arg1 = stmt.children
@@ -1003,10 +999,27 @@ class GrammarBuilder:

return dotted_path, base_path, aliases

def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
def _unpack_definition(self, tree, mangle):
if tree.data == 'rule':
name, params, exp, opts = options_from_rule(*tree.children)
else:
name = tree.children[0].value
params = () # TODO terminal templates
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
exp = tree.children[-1]

if mangle is not None:
params = tuple(mangle(p) for p in params)
name = mangle(name)

exp = _mangle_exp(exp, mangle)
return name, exp, params, opts


def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None, dotted_path=None):
tree = _parse_grammar(grammar_text, grammar_name)

imports = {} # imports are collect over the whole file to prevent duplications
imports = {}
for stmt in tree.children:
if stmt.data == 'import':
dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
@@ -1045,8 +1058,27 @@ class GrammarBuilder:
assert False, stmt


term_defs = { name: exp
for name, (_params, exp, _options) in self._definitions.items()
if self._is_term(name)
}
resolve_term_references(term_defs)


def _remove_unused(self, used):
def rule_dependencies(symbol):
if self._is_term(symbol):
return []
params, tree,_ = self._definitions[symbol]
return _find_used_symbols(tree) - set(params)

_used = set(bfs(used, rule_dependencies))
self._definitions = {k: v for k, v in self._definitions.items() if k in _used}


def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
mangle = self.get_mangle('__'.join(dotted_path), aliases, base_mangle)
assert dotted_path
mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
grammar_path = os.path.join(*dotted_path) + EXT
to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
for source in to_try:
@@ -1060,29 +1092,23 @@ class GrammarBuilder:
except IOError:
continue
else:
self.load_grammar(text, joined_path, mangle)
gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths)
gb.load_grammar(text, joined_path, mangle, dotted_path)
gb._remove_unused(map(mangle, aliases))
for name in gb._definitions:
if name in self._definitions:
raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))

self._definitions.update(**gb._definitions)
break
else:
# Search failed. Make Python throw a nice error.
open(grammar_path, encoding='utf8')
assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)

def get_mangle(self, prefix, aliases, base_mangle=None):
def mangle(s):
if s in aliases:
s = aliases[s]
else:
if s[0] == '_':
s = '_%s__%s' % (prefix, s[1:])
else:
s = '%s__%s' % (prefix, s)
if base_mangle is not None:
s = base_mangle(s)
return s
return mangle

def validate(self):
for name, (params, exp, options) in self._definitions.items():
for name, (params, exp, _options) in self._definitions.items():
for i, p in enumerate(params):
if p in self._definitions:
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
@@ -1120,7 +1146,7 @@ class GrammarBuilder:
term_defs.append((name, (exp, options)))
else:
rule_defs.append((name, params, exp, options))
resolve_term_references(term_defs)
# resolve_term_references(term_defs)
return Grammar(rule_defs, term_defs, self._ignore_names)

def load_grammar(grammar, source, import_paths, global_keep_all_tokens):


+ 36
- 0
tests/test_grammar.py View File

@@ -5,6 +5,7 @@ from unittest import TestCase, main

from lark import Lark, Token, Tree
from lark.load_grammar import GrammarError, GRAMMAR_ERRORS
from lark.load_grammar import FromPackageLoader


class TestGrammar(TestCase):
@@ -124,6 +125,41 @@ class TestGrammar(TestCase):
"""
self.assertRaises( GrammarError, Lark, g)

def test_import_custom_sources(self):
custom_loader = FromPackageLoader('tests', ('grammars', ))

grammar = """
start: startab

%import ab.startab
"""

p = Lark(grammar, import_paths=[custom_loader])
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

def test_import_custom_sources2(self):
custom_loader = FromPackageLoader('tests', ('grammars', ))

grammar = """
start: rule_to_import

%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
"""
p = Lark(grammar, import_paths=[custom_loader])
x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])

def test_import_custom_sources3(self):
custom_loader2 = FromPackageLoader('tests')
grammar = """
%import .test_relative_import (start, WS)
%ignore WS
"""
p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file
x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras'])


if __name__ == '__main__':
main()


+ 0
- 31
tests/test_parser.py View File

@@ -11,7 +11,6 @@ from copy import copy, deepcopy
from lark.utils import Py36, isascii

from lark import Token
from lark.load_grammar import FromPackageLoader

try:
from cStringIO import StringIO as cStringIO
@@ -1923,36 +1922,6 @@ def _make_parser_test(LEXER, PARSER):
parser = _Lark(grammar, postlex=CustomIndenter())
parser.parse("a\n b\n")

def test_import_custom_sources(self):
custom_loader = FromPackageLoader('tests', ('grammars', ))

grammar = """
start: startab

%import ab.startab
"""

p = _Lark(grammar, import_paths=[custom_loader])
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))

grammar = """
start: rule_to_import

%import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
"""
p = _Lark(grammar, import_paths=[custom_loader])
x = p.parse('N')
self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])

custom_loader2 = FromPackageLoader('tests')
grammar = """
%import .test_relative_import (start, WS)
%ignore WS
"""
p = _Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file
x = p.parse('12 capybaras')
self.assertEqual(x.children, ['12', 'capybaras'])

@unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK")
def test_prioritization(self):


Loading…
Cancel
Save