Browse Source

PR refactor: Simplified load_grammar

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 3 years ago
parent
commit
d18e009fa4
1 changed files with 48 additions and 44 deletions
  1. +48
    -44
      lark/load_grammar.py

+ 48
- 44
lark/load_grammar.py View File

@@ -104,7 +104,7 @@ TERMINALS = {
RULES = {
'start': ['_list'],
'_list': ['_item', '_list _item'],
'_item': ['rule', 'term', 'statement', '_NL'],
'_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],

'rule': ['RULE template_params _COLON expansions _NL',
'RULE template_params _DOT NUMBER _COLON expansions _NL'],
@@ -151,7 +151,6 @@ RULES = {

'term': ['TERMINAL _COLON expansions _NL',
'TERMINAL _DOT NUMBER _COLON expansions _NL'],
'statement': ['ignore', 'import', 'declare', 'override', 'extend'],
'override': ['_OVERRIDE rule',
'_OVERRIDE term'],
'extend': ['_EXTEND rule',
@@ -799,12 +798,12 @@ def _find_used_symbols(tree):
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}


def _grammar_parser():
def _get_parser():
try:
return _grammar_parser.cache
return _get_parser.cache
except AttributeError:
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
for r, _p, xs, o in rules for i, x in enumerate(xs)]
@@ -814,8 +813,8 @@ def _grammar_parser():
parser_conf = ParserConf(rules, callback, ['start'])
lexer_conf.lexer_type = 'standard'
parser_conf.parser_type = 'lalr'
_grammar_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {})
return _grammar_parser.cache
_get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {})
return _get_parser.cache

GRAMMAR_ERRORS = [
('Unclosed parenthesis', ['a: (\n']),
@@ -833,20 +832,22 @@ GRAMMAR_ERRORS = [

def _parse_grammar(text, name, start='start'):
try:
return PrepareGrammar().transform(_grammar_parser().parse(text + '\n', start))
tree = _get_parser().parse(text + '\n', start)
except UnexpectedCharacters as e:
context = e.get_context(text)
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
(e.line, e.column, name, context))
except UnexpectedToken as e:
context = e.get_context(text)
error = e.match_examples(_grammar_parser().parse, GRAMMAR_ERRORS, use_accepts=True)
error = e.match_examples(_get_parser().parse, GRAMMAR_ERRORS, use_accepts=True)
if error:
raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
elif 'STRING' in e.expected:
raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context))
raise

return PrepareGrammar().transform(tree)


class GrammarBuilder:
def __init__(self, global_keep_all_tokens=False, import_paths=None):
@@ -855,7 +856,7 @@ class GrammarBuilder:

self._definitions = {}
self._ignore_names = []
def _is_term(self, name):
# Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME`
# Only the last part is the actual name, and the rest might contain mixed case
@@ -869,13 +870,13 @@ class GrammarBuilder:
args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)]
args['Type' + postfix] = lowercase_type.title()
raise GrammarError(msg.format(**args))
def _check_options(self, name, options):
if self._is_term(name):
if options is None:
options = 1
# if we don't use Integral here, we run into python2.7/python3 problems with long vs int
elif not isinstance(options, Integral):
elif not isinstance(options, Integral):
raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
else:
if options is None:
@@ -886,7 +887,7 @@ class GrammarBuilder:
options.keep_all_tokens = True
return options

def _define(self, name, exp, params=(), options=None, override=False):
if (name in self._definitions) ^ override:
if override:
@@ -900,7 +901,7 @@ class GrammarBuilder:
def _extend(self, name, exp, params=(), options=None):
if name not in self._definitions:
self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name)
if tuple(params) != tuple(self._definitions[name][0]):
if tuple(params) != tuple(self._definitions[name][0]):
self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
# TODO: think about what to do with 'options'
base = self._definitions[name][1]
@@ -929,11 +930,11 @@ class GrammarBuilder:
name = '__IGNORE_%d'% len(self._ignore_names)
self._ignore_names.append(name)
self._definitions[name] = ((), t, 1)
def _declare(self, *names):
for name in names:
self._define(name, None)
def _mangle_exp(self, exp, mangle):
if mangle is None:
return exp
@@ -944,21 +945,23 @@ class GrammarBuilder:
t.children[i] = Token(c.type, mangle(c.value))
return exp

def _unpack_definition(self, tree, mangle):
if tree.data == 'rule':
name, params, exp, opts = options_from_rule(*tree.children)
else:
name = tree.children[0].value
params = ()
params = () # TODO terminal templates
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
exp = tree.children[-1]

if mangle is not None:
params = tuple(mangle(p) for p in params)
name = mangle(name)

exp = self._mangle_exp(exp, mangle)
return name, exp, params, opts
def _unpack_import(self, stmt, grammar_name):
if len(stmt.children) > 1:
path_node, arg1 = stmt.children
@@ -992,19 +995,15 @@ class GrammarBuilder:
base_path = os.path.split(base_file)[0]
else:
base_path = os.path.abspath(os.path.curdir)
return dotted_path, base_path, aliases

def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
tree = _parse_grammar(grammar_text, grammar_name)

imports = {} # imports are collect over the whole file to prevent duplications
actions = [] # Some statements need to be delayed (override and extend) till after imports are handled
for stmt in tree.children:
if stmt.data in ('term', 'rule'):
self._define(*self._unpack_definition(stmt, mangle))
continue
assert stmt.data == 'statement', stmt.data
stmt ,= stmt.children
if stmt.data == 'import':
dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
try:
@@ -1013,30 +1012,35 @@ class GrammarBuilder:
import_aliases.update(aliases)
except KeyError:
imports[dotted_path] = base_path, aliases

for dotted_path, (base_path, aliases) in imports.items():
self.do_import(dotted_path, base_path, aliases, mangle)

for stmt in tree.children:
if stmt.data in ('term', 'rule'):
self._define(*self._unpack_definition(stmt, mangle))
elif stmt.data == 'override':
r ,= stmt.children
self._define(*self._unpack_definition(r, mangle), override=True)
elif stmt.data == 'extend':
r ,= stmt.children
self._extend(*self._unpack_definition(r, mangle))
elif stmt.data == 'ignore':
# if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
if mangle is None:
self._ignore(*stmt.children)
elif stmt.data == 'declare':
names = [t.value for t in stmt.children]
if mangle is None:
self._declare(*(t.value for t in stmt.children))
self._declare(*names)
else:
self._declare(*(mangle(t.value) for t in stmt.children))
elif stmt.data == 'override':
r ,= stmt.children
actions.append((self._define, self._unpack_definition(r, mangle) + (True,)))
elif stmt.data == 'extend':
r ,= stmt.children
actions.append((self._extend, self._unpack_definition(r, mangle)))
self._declare(*map(mangle, names))
elif stmt.data == 'import':
pass
else:
assert False, stmt
for dotted_path, (base_path, aliases) in imports.items():
self.do_import(dotted_path, base_path, aliases, mangle)
for f, args in actions:
f(*args)


def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
mangle = self.get_mangle('__'.join(dotted_path), aliases, base_mangle)
grammar_path = os.path.join(*dotted_path) + EXT
@@ -1062,7 +1066,7 @@ class GrammarBuilder:
def get_mangle(self, prefix, aliases, base_mangle=None):
def mangle(s):
if s in aliases:
s = aliases[s]
s = aliases[s]
else:
if s[0] == '_':
s = '_%s__%s' % (prefix, s[1:])
@@ -1080,7 +1084,7 @@ class GrammarBuilder:
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
if p in params[:i]:
raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
if exp is None: # Remaining checks don't work for abstract rules/terminals
continue

@@ -1094,7 +1098,7 @@ class GrammarBuilder:
expected, actual = len(self._definitions[sym][0]), len(args)
self._grammar_error("Wrong number of template arguments used for {name} "
"(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
for sym in _find_used_symbols(exp):
if sym not in self._definitions and sym not in params:
self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)


Loading…
Cancel
Save