Browse Source

Merge branch 'MegaIng-keep_all_maybe'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 4 years ago
parent
commit
131012b893
4 changed files with 41 additions and 26 deletions
  1. +8
    -2
      lark/lark.py
  2. +27
    -21
      lark/load_grammar.py
  3. +2
    -3
      lark/parse_tree_builder.py
  4. +4
    -0
      tests/test_parser.py

+ 8
- 2
lark/lark.py View File

@@ -274,7 +274,7 @@ class Lark(Serialize):
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )


# Parse the grammar file and compose the grammars (TODO) # Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module)
self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens)


if self.options.postlex is not None: if self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept) terminals_to_keep = set(self.options.postlex.always_accept)
@@ -335,7 +335,13 @@ class Lark(Serialize):
self._callbacks = None self._callbacks = None
# we don't need these callbacks if we aren't building a tree # we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest': if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._parse_tree_builder = ParseTreeBuilder(
self.rules,
self.options.tree_class or Tree,
self.options.propagate_positions,
self.options.parser!='lalr' and self.options.ambiguity=='explicit',
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)


def _build_parser(self): def _build_parser(self):


+ 27
- 21
lark/load_grammar.py View File

@@ -650,22 +650,6 @@ class Grammar:




_imported_grammars = {} _imported_grammars = {}
def import_grammar(grammar_path, re_, base_paths=[]):
if grammar_path not in _imported_grammars:
import_paths = base_paths + IMPORT_PATHS
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = load_grammar(text, joined_path, re_)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8')
assert False

return _imported_grammars[grammar_path]


def import_from_grammar_into_namespace(grammar, namespace, aliases): def import_from_grammar_into_namespace(grammar, namespace, aliases):
"""Returns all rules and terminals of grammar, prepended """Returns all rules and terminals of grammar, prepended
@@ -803,7 +787,7 @@ class GrammarLoader:
('%ignore expects a value', ['%ignore %import\n']), ('%ignore expects a value', ['%ignore %import\n']),
] ]


def __init__(self, re_module):
def __init__(self, re_module, global_keep_all_tokens):
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]


rules = [options_from_rule(name, None, x) for name, x in RULES.items()] rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
@@ -816,6 +800,24 @@ class GrammarLoader:


self.canonize_tree = CanonizeTree() self.canonize_tree = CanonizeTree()
self.re_module = re_module self.re_module = re_module
self.global_keep_all_tokens = global_keep_all_tokens

def import_grammar(self, grammar_path, base_paths=[]):
if grammar_path not in _imported_grammars:
import_paths = base_paths + IMPORT_PATHS
for import_path in import_paths:
with suppress(IOError):
joined_path = os.path.join(import_path, grammar_path)
with open(joined_path, encoding='utf8') as f:
text = f.read()
grammar = self.load_grammar(text, joined_path)
_imported_grammars[grammar_path] = grammar
break
else:
open(grammar_path, encoding='utf8') # Force a file not found error
assert False

return _imported_grammars[grammar_path]


def load_grammar(self, grammar_text, grammar_name='<?>'): def load_grammar(self, grammar_text, grammar_name='<?>'):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
@@ -901,7 +903,7 @@ class GrammarLoader:
# import grammars # import grammars
for dotted_path, (base_paths, aliases) in imports.items(): for dotted_path, (base_paths, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
g = self.import_grammar(grammar_path, base_paths=base_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)


term_defs += new_td term_defs += new_td
@@ -946,7 +948,11 @@ class GrammarLoader:
rules = rule_defs rules = rule_defs


rule_names = {} rule_names = {}
for name, params, _x, _o in rules:
for name, params, _x, option in rules:
# We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
if self.global_keep_all_tokens:
option.keep_all_tokens = True

if name.startswith('__'): if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
if name in rule_names: if name in rule_names:
@@ -981,5 +987,5 @@ class GrammarLoader:






def load_grammar(grammar, source, re_):
return GrammarLoader(re_).load_grammar(grammar, source)
def load_grammar(grammar, source, re_, global_keep_all_tokens):
return GrammarLoader(re_, global_keep_all_tokens).load_grammar(grammar, source)

+ 2
- 3
lark/parse_tree_builder.py View File

@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper):




class ParseTreeBuilder: class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
self.tree_class = tree_class self.tree_class = tree_class
self.propagate_positions = propagate_positions self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens
self.ambiguous = ambiguous self.ambiguous = ambiguous
self.maybe_placeholders = maybe_placeholders self.maybe_placeholders = maybe_placeholders


@@ -311,7 +310,7 @@ class ParseTreeBuilder:
def _init_builders(self, rules): def _init_builders(self, rules):
for rule in rules: for rule in rules:
options = rule.options options = rule.options
keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
keep_all_tokens = options.keep_all_tokens
expand_single_child = options.expand1 expand_single_child = options.expand1


wrapper_chain = list(filter(None, [ wrapper_chain = list(filter(None, [


+ 4
- 0
tests/test_parser.py View File

@@ -2064,6 +2064,10 @@ def _make_parser_test(LEXER, PARSER):
# Anonymous tokens shouldn't count # Anonymous tokens shouldn't count
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, []) self.assertEqual(p.parse("").children, [])
# Unless keep_all_tokens=True
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
self.assertEqual(p.parse("").children, [None, None, None])


# All invisible constructs shouldn't count # All invisible constructs shouldn't count
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]


Loading…
Cancel
Save