Browse Source

removed GrammarLoader

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 3 years ago
parent
commit
9f3ef97bd7
1 changed files with 6 additions and 262 deletions
  1. +6
    -262
      lark/load_grammar.py

+ 6
- 262
lark/load_grammar.py View File

@@ -854,15 +854,6 @@ def _find_used_symbols(tree):
return {t for x in tree.find_data('expansion')
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}

def extend_expansions(tree, new):
assert isinstance(tree, Tree) and tree.data == 'expansions'
assert isinstance(new, Tree) and new.data == 'expansions'
while len(tree.children) == 2:
assert isinstance(tree.children[0], Tree) and tree.children[0].data == 'expansions', tree
tree = tree.children[0]
tree.children.insert(0, new)



def _grammar_parser():
try:
@@ -913,256 +904,6 @@ def _parse_grammar(text, name, start='start'):
raise


class GrammarLoader:

def __init__(self, global_keep_all_tokens=False):
self.global_keep_all_tokens = global_keep_all_tokens

def import_grammar(self, grammar_path, base_path=None, import_paths=[]):
if grammar_path not in _imported_grammars:
# import_paths take priority over base_path since they should handle relative imports and ignore everything else.
to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
for source in to_try:
try:
if callable(source):
joined_path, text = source(base_path, grammar_path)
else:
joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
except IOError:
continue
else:
grammar = self.load_grammar(text, joined_path, import_paths)
_imported_grammars[grammar_path] = grammar
break
else:
# Search failed. Make Python throw a nice error.
open(grammar_path, encoding='utf8')
assert False

return _imported_grammars[grammar_path]

def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""

tree = _parse_grammar(grammar_text+'\n', grammar_name)
# tree = PrepareGrammar().transform(tree)

# Extract grammar items
defs = classify(tree.children, lambda c: c.data, lambda c: c.children)
term_defs = defs.pop('term', [])
rule_defs = defs.pop('rule', [])
statements = defs.pop('statement', [])
assert not defs

term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
rule_defs = [options_from_rule(*x) for x in rule_defs]

# Execute statements
ignore, imports = [], {}
overriding_rules = []
overriding_terms = []
extend_rules = []
extend_terms = []
for (stmt,) in statements:
if stmt.data == 'ignore':
t ,= stmt.children
ignore.append(t)
elif stmt.data == 'import':
if len(stmt.children) > 1:
path_node, arg1 = stmt.children
else:
path_node ,= stmt.children
arg1 = None

if isinstance(arg1, Tree): # Multi import
dotted_path = tuple(path_node.children)
names = arg1.children
aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names
else: # Single import
dotted_path = tuple(path_node.children[:-1])
name = path_node.children[-1] # Get name from dotted path
aliases = {name: arg1 or name} # Aliases if exist

if path_node.data == 'import_lib': # Import from library
base_path = None
else: # Relative import
if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
try:
base_file = os.path.abspath(sys.modules['__main__'].__file__)
except AttributeError:
base_file = None
else:
base_file = grammar_name # Import relative to grammar file path if external grammar file
if base_file:
if isinstance(base_file, PackageResource):
base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
else:
base_path = os.path.split(base_file)[0]
else:
base_path = os.path.abspath(os.path.curdir)

try:
import_base_path, import_aliases = imports[dotted_path]
assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
import_aliases.update(aliases)
except KeyError:
imports[dotted_path] = base_path, aliases

elif stmt.data == 'declare':
for t in stmt.children:
term_defs.append([t.value, (None, None)])
elif stmt.data == 'override':
r ,= stmt.children
if r.data == 'rule':
overriding_rules.append(options_from_rule(*r.children))
else:
if len(r.children) == 2:
overriding_terms.append((r.children[0].value, (r.children[1], 1)))
else:
overriding_terms.append((r.children[0].value, (r.children[2], int(r.children[1]))))
elif stmt.data == 'extend':
r ,= stmt.children
if r.data == 'rule':
extend_rules.append(options_from_rule(*r.children))
else:
if len(r.children) == 2:
extend_terms.append((r.children[0].value, (r.children[1], 1)))
else:
extend_terms.append((r.children[0].value, (r.children[2], int(r.children[1]))))
else:
assert False, stmt

# import grammars
for dotted_path, (base_path, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT
g = self.import_grammar(grammar_path, base_path=base_path, import_paths=import_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

term_defs += new_td
rule_defs += new_rd

# replace rules by overridding rules, according to name
for r in overriding_rules:
name = r[0]
# remove overridden rule from rule_defs
overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name) # FIXME inefficient
if not overridden:
raise GrammarError("Cannot override a nonexisting rule: %s" % name)
rule_defs.append(r)

# Same for terminals
for t in overriding_terms:
name = t[0]
# remove overridden rule from rule_defs
overridden, term_defs = classify_bool(term_defs, lambda t: t[0] == name) # FIXME inefficient
if not overridden:
raise GrammarError("Cannot override a nonexisting terminal: %s" % name)
term_defs.append(t)
# Extend the definition of rules by adding new entries to the `expansions` node

for r in extend_rules:
name = r[0]
# remove overridden rule from rule_defs
for old in rule_defs:
if old[0] == name:
if len(old[1]) != len(r[1]):
raise GrammarError("Cannot extend templates with different parameters: %s" % name)
extend_expansions(old[2], r[2])
break
else:
raise GrammarError("Can't extend rule %s as it wasn't defined before" % name)

# Same for terminals
for name, (e, _) in extend_terms:
for old in term_defs:
if old[0] == name:
extend_expansions(old[1][0], e)
break
else:
raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name)
## Handle terminals

# Verify correctness 1
for name, _ in term_defs:
if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)

# Handle ignore tokens
# XXX A slightly hacky solution. Recognition of %ignore TERMINAL as separate comes from the lexer's
# inability to handle duplicate terminals (two names, one value)
ignore_names = []
for t in ignore:
if t.data=='expansions' and len(t.children) == 1:
t2 ,= t.children
if t2.data=='expansion' and len(t2.children) == 1:
item ,= t2.children
if item.data == 'value':
item ,= item.children
if isinstance(item, Token) and item.type == 'TERMINAL':
ignore_names.append(item.value)
continue

name = '__IGNORE_%d'% len(ignore_names)
ignore_names.append(name)
term_defs.append((name, (t, 1)))

# Verify correctness 2
terminal_names = set()
for name, _ in term_defs:
if name in terminal_names:
raise GrammarError("Terminal '%s' defined more than once" % name)
terminal_names.add(name)

if set(ignore_names) > terminal_names:
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names))

resolve_term_references(term_defs)

## Handle rules

rule_names = {}
for name, params, _x, option in rule_defs:
# We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
if self.global_keep_all_tokens:
option.keep_all_tokens = True

if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
if name in rule_names:
raise GrammarError("Rule '%s' defined more than once" % name)
rule_names[name] = len(params)

for name, params , expansions, _o in rule_defs:
for i, p in enumerate(params):
if p in rule_names:
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
if p in params[:i]:
raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
for temp in expansions.find_data('template_usage'):
sym = temp.children[0]
args = temp.children[1:]
if sym not in params:
if sym not in rule_names:
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name))
if len(args) != rule_names[sym]:
raise GrammarError("Wrong number of template arguments used for %s "
"(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name))
for sym in _find_used_symbols(expansions):
if sym.type == 'TERMINAL':
if sym not in terminal_names:
raise GrammarError("Terminal '%s' used but not defined (in rule %s)" % (sym, name))
else:
if sym not in rule_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))

return Grammar(rule_defs, term_defs, ignore_names)


class GrammarBuilder:
def __init__(self, global_keep_all_tokens=False, import_paths=None):
self.global_keep_all_tokens = global_keep_all_tokens
@@ -1216,8 +957,12 @@ class GrammarBuilder:
if tuple(params) != tuple(self._definitions[name][0]):
self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
# TODO: think about what to do with 'options'
old_expansions = self._definitions[name][1]
extend_expansions(old_expansions, exp)
base = self._definitions[name][1]

while len(base.children) == 2:
assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', tree
base = base.children[0]
base.children.insert(0, exp)

def _ignore(self, exp_or_name):
if isinstance(exp_or_name, str):
@@ -1430,4 +1175,3 @@ def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
builder = GrammarBuilder(global_keep_all_tokens, import_paths)
builder.load_grammar(grammar, source)
return builder.build()
# return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)

Loading…
Cancel
Save