Преглед на файлове

Added `%import` to GrammarLoader & fixed tests

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 преди 3 години
родител
ревизия
225c9e16a5
променени са 3 файла, в които са добавени 207 реда и са изтрити 92 реда
  1. +204
    -89
      lark/load_grammar.py
  2. +2
    -2
      tests/test_grammar.py
  3. +1
    -1
      tests/test_parser.py

+ 204
- 89
lark/load_grammar.py Целия файл

@@ -881,7 +881,7 @@ def _grammar_parser():
_grammar_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {})
return _grammar_parser.cache

_GRAMMAR_ERRORS = [
GRAMMAR_ERRORS = [
('Unclosed parenthesis', ['a: (\n']),
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
@@ -897,14 +897,14 @@ _GRAMMAR_ERRORS = [

def _parse_grammar(text, name, start='start'):
try:
return _grammar_parser().parse(text + '\n', start)
return PrepareGrammar().transform(_grammar_parser().parse(text + '\n', start))
except UnexpectedCharacters as e:
context = e.get_context(text)
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
(e.line, e.column, name, context))
except UnexpectedToken as e:
context = e.get_context(text)
error = e.match_examples(_grammar_parser().parse, _GRAMMAR_ERRORS, use_accepts=True)
error = e.match_examples(_grammar_parser().parse, GRAMMAR_ERRORS, use_accepts=True)
if error:
raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
elif 'STRING' in e.expected:
@@ -913,19 +913,6 @@ def _parse_grammar(text, name, start='start'):


class GrammarLoader:
ERRORS = [
('Unclosed parenthesis', ['a: (\n']),
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
('Illegal name for rules or terminals', ['Aa:\n']),
('Alias expects lowercase name', ['a: -> "a"\n']),
('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),
('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),
('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),
('Terminal names cannot contain dots', ['A.B\n']),
('%import expects a name', ['%import "a"\n']),
('%ignore expects a value', ['%ignore %import\n']),
]

def __init__(self, global_keep_all_tokens=False):
self.global_keep_all_tokens = global_keep_all_tokens
@@ -959,7 +946,7 @@ class GrammarLoader:
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""

tree = _parse_grammar(grammar_text+'\n', grammar_name)
tree = PrepareGrammar().transform(tree)
# tree = PrepareGrammar().transform(tree)

# Extract grammar items
defs = classify(tree.children, lambda c: c.data, lambda c: c.children)
@@ -1167,7 +1154,7 @@ class GrammarLoader:
for sym in _find_used_symbols(expansions):
if sym.type == 'TERMINAL':
if sym not in terminal_names:
raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name))
raise GrammarError("Terminal '%s' used but not defined (in rule %s)" % (sym, name))
else:
if sym not in rule_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))
@@ -1180,52 +1167,63 @@ class GrammarBuilder:
self.global_keep_all_tokens = global_keep_all_tokens
self.import_paths = import_paths or []

self._term_defs = {}
self._rule_defs = {}
self._definitions = {}
self._extend = {}
self._override = {}
self._ignore_names = []
self._import_set = {}
def _is_term(self, name):
return name.isupper()

def define_term(self, name, exp, priority=1, override=False):
if (name in self._term_defs) ^ override:
if override:
raise GrammarError("Cannot override a nonexisting terminal" % name)
else:
raise GrammarError("Terminal '%s' defined more than once" % name)
if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
self._term_defs[name] = (exp, priority)
def _grammer_error(self, msg, name):
low_type = ("rule", "terminal")[self._is_term(name)]
up_type = low_type.title()
raise GrammarError(msg.format(name=name, type=low_type, Type=up_type))
def define_rule(self, name, params, exp, options, override=False):
if (name in self._rule_defs) ^ override:
def _check_options(self, name, options):
if self._is_term(name):
if options is None:
options = 1
elif not isinstance(options, int):
raise GrammarError("Terminal require a single int as 'options' (e.g. priority)")
else:
if options is None:
options = RuleOptions()
elif not isinstance(options, RuleOptions):
raise GrammarError("Rules require a RuleOptions instance as 'options'")
if self.global_keep_all_tokens:
options.keep_all_tokens = True
return options

def define(self, name, exp, params=(), options=None, override=False):
if (name in self._definitions) ^ override:
if override:
raise GrammarError("Cannot override a nonexisting rule: %s" % name)
self._grammer_error("Cannot override a nonexisting {type} {name}", name)
else:
raise GrammarError("Rule '%s' defined more than once" % name)
self._grammer_error("{Type} '{name}' defined more than once", name)
if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
self._rule_defs[name] = (params, exp, options)

def extend_term(self, name, exp, priority=1):
if name not in self._term_defs:
raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name)
old_expansions = self._term_defs[name][0]
extend_expansions(old_expansions, exp)
def extend_rule(self, name, params, exp, options):
if name not in self._rule_defs:
raise GrammarError("Can't extend rule %s as it wasn't defined before" % name)
if params != self._rule_defs[name][0]:
raise GrammarError("Cannot extend templates with different parameters: %s" % name)
# TODO: think about what to do with RuleOptions
old_expansions = self._rule_defs[name][1]
self._grammer_error('Names starting with double-underscore are reserved (Error at {name})', name)
self._definitions[name] = (params, exp, self._check_options(name, options))

def extend(self, name, exp, params=(), options=None):
if name not in self._definitions:
self._grammer_error("Can't extend {type} {name} as it wasn't defined before", name)
if tuple(params) != tuple(self._definitions[name][0]):
print(params, self._definitions[name][0])
self._grammer_error("Cannot extend {type} with different parameters: {name}", name)
# TODO: think about what to do with 'options'
old_expansions = self._definitions[name][1]
extend_expansions(old_expansions, exp)

def ignore(self, exp_or_name):
if isinstance(exp_or_name, str):
self._ignore_names.append(exp_or_name)
else:
assert isinstance(exp_or_name, Tree)
t = exp_or_name
if t.data=='expansions' and len(t.children) == 1:
if t.data == 'expansions' and len(t.children) == 1:
t2 ,= t.children
if t2.data=='expansion' and len(t2.children) == 1:
item ,= t2.children
@@ -1237,62 +1235,179 @@ class GrammarBuilder:

name = '__IGNORE_%d'% len(self._ignore_names)
self._ignore_names.append(name)
self._term_defs[name] = (t, 1)
self._definitions[name] = ((), t, 1)
def declare(self, *names):
for name in names:
self.define_term(name, None, None)
self.define(name, None)
# TODO: options/priority gets filled by this. We have to make sure that this doesn't break anything
def _mangle_exp(self, exp, mangle):
if mangle is None:
return exp
exp = deepcopy(exp) # TODO: is this needed
for t in exp.iter_subtrees():
for i, c in enumerate(t.children):
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
t.children[i] = Token(c.type, mangle(c.value))
return exp

def _unpack_term_def(self, tree):
name = tree.children[0].value
exp = tree.children[-1]
p = int(tree.children[1]) if len(tree.children) == 3 else 1
return name, exp, p
def _unpack_definition(self, tree, mangle):
if tree.data == 'rule':
name, params, exp, opts = options_from_rule(*tree.children)
else:
name = tree.children[0].value
params = ()
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
exp = tree.children[-1]
if mangle is not None:
params = tuple(mangle(p) for p in params)
name = mangle(name)
exp = self._mangle_exp(exp, mangle)
return name, exp, params, opts
def _unpack_rule_def(self, tree):
# FIXME: A little pointless at the moment, but I want to rework this (e.g. move the work from `options_from_rule` to here)
r = options_from_rule(*tree.children)
return r
def _unpack_import(self, stmt, grammar_name):
if len(stmt.children) > 1:
path_node, arg1 = stmt.children
else:
path_node, = stmt.children
arg1 = None

if isinstance(arg1, Tree): # Multi import
dotted_path = tuple(path_node.children)
names = arg1.children
aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names
else: # Single import
dotted_path = tuple(path_node.children[:-1])
name = path_node.children[-1] # Get name from dotted path
aliases = {name.value: (arg1 or name).value} # Aliases if exist

if path_node.data == 'import_lib': # Import from library
base_path = None
else: # Relative import
if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
try:
base_file = os.path.abspath(sys.modules['__main__'].__file__)
except AttributeError:
base_file = None
else:
base_file = grammar_name # Import relative to grammar file path if external grammar file
if base_file:
if isinstance(base_file, PackageResource):
base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
else:
base_path = os.path.split(base_file)[0]
else:
base_path = os.path.abspath(os.path.curdir)
return dotted_path, base_path, aliases

def load_grammar(self, grammar_text, grammar_source="<?>"):
tree = _parse_grammar(grammar_text, grammar_source)
def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
tree = _parse_grammar(grammar_text, grammar_name)
imports = {} # imports are collect over the whole file to prevent duplications
actions = [] # Some statements need to be delayed (override and extend) till after imports are handled
for stmt in tree.children:
if stmt.data == 'term':
self.define_term(*self._unpack_term_def(stmt))
continue
elif stmt.data == 'rule':
self.define_rule(*self._unpack_rule_def(stmt))
if stmt.data in ('term', 'rule'):
self.define(*self._unpack_definition(stmt, mangle))
continue
assert stmt.data == 'statement', stmt.data
stmt ,= stmt.children
if stmt.data == 'ignore':
self.ignore(*stmt.children)
if stmt.data == 'import':
dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
try:
import_base_path, import_aliases = imports[dotted_path]
assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
import_aliases.update(aliases)
except KeyError:
imports[dotted_path] = base_path, aliases
elif stmt.data == 'ignore':
# if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
if mangle is None:
self.ignore(*stmt.children)
elif stmt.data == 'declare':
self.declare(*(t.value for t in stmt.children))
if mangle is None:
self.declare(*(t.value for t in stmt.children))
else:
self.declare(*(mangle(t.value) for t in stmt.children))
elif stmt.data == 'override':
r ,= stmt.children
if r.data == 'rule':
self.define_rule(*self._unpack_rule_def(r), override=True)
else:
assert r.data == 'term'
self.define_term(*self._unpack_term_def(r), override=True)
actions.append((self.define, self._unpack_definition(r, mangle)+ (True,)))
elif stmt.data == 'extend':
r ,= stmt.children
if r.data == 'rule':
self.extend_rule(*self._unpack_rule_def(r))
else:
assert r.data == 'term'
self.extend_term(*self._unpack_term_def(r))
actions.append((self.extend, self._unpack_definition(r, mangle)))
else:
assert False, stmt
for dotted_path, (base_path, aliases) in imports.items():
self.do_import(dotted_path, base_path, aliases, mangle)
for f, args in actions:
f(*args)
def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
mangle = self.get_mangle('__'.join(dotted_path), aliases, base_mangle)
grammar_path = os.path.join(*dotted_path) + EXT
to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
for source in to_try:
try:
if callable(source):
joined_path, text = source(base_path, grammar_path)
else:
joined_path = os.path.join(source, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
except IOError:
continue
else:
self.load_grammar(text, joined_path, mangle)
break
else:
# Search failed. Make Python throw a nice error.
open(grammar_path, encoding='utf8')
assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)

def get_mangle(self, prefix, aliases, base_mangle=None):
prefixes = (prefix, prefix.upper())
def mangle(s):
if s in aliases:
s = aliases[s]
else:
ns = prefixes[self._is_term(s)]
if s[0] == '_':
s = '_%s__%s' % (ns, s[1:])
else:
s = '%s__%s' % (ns, s)
if base_mangle is not None:
s = base_mangle(s)
return s
return mangle

def check(self):
pass
def build(self) -> Grammar:
return Grammar([(n, *r) for n, r in self._rule_defs.items()],
[(n, t) for n, t in self._term_defs],
self._ignore_names)
for name, (params, exp, options) in self._definitions.items():
if self._is_term(name):
assert isinstance(options, int)
if exp is not None:
for sym in _find_used_symbols(exp):
if sym not in self._definitions and sym not in params:
self._grammer_error("{Type} '{name}' used but not defined (in rule %s)" % name, sym.value)
if not set(self._definitions).issuperset(self._ignore_names):
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))

def build(self):
self.check()
rule_defs = []
term_defs = []
for name, (params, exp, options) in self._definitions.items():
if self._is_term(name):
term_defs.append((name, (exp, options)))
else:
rule_defs.append((name, params, exp, options))
resolve_term_references(term_defs)
return Grammar(rule_defs, term_defs, self._ignore_names)

def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)
builder = GrammarBuilder(global_keep_all_tokens, import_paths)
builder.load_grammar(grammar, source)
return builder.build()
# return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)

+ 2
- 2
tests/test_grammar.py Целия файл

@@ -4,7 +4,7 @@ import sys
from unittest import TestCase, main

from lark import Lark, Token, Tree
from lark.load_grammar import GrammarLoader, GrammarError
from lark.load_grammar import GrammarError, GRAMMAR_ERRORS


class TestGrammar(TestCase):
@@ -12,7 +12,7 @@ class TestGrammar(TestCase):
pass

def test_errors(self):
for msg, examples in GrammarLoader.ERRORS:
for msg, examples in GRAMMAR_ERRORS:
for example in examples:
try:
p = Lark(example)


+ 1
- 1
tests/test_parser.py Целия файл

@@ -1961,7 +1961,7 @@ def _make_parser_test(LEXER, PARSER):

p = _Lark(grammar, import_paths=[custom_loader])
self.assertEqual(p.parse('ab'),
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))
Tree('start', [Tree('startab', [Tree('ab__expr', [Token('AB__A', 'a'), Token('AB__B', 'b')])])]))

grammar = """
start: rule_to_import


Зареждане…
Отказ
Запис