@@ -267,7 +267,7 @@ class Lark(Serialize): | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | |||
# Parse the grammar file and compose the grammars (TODO) | |||
self.grammar = load_grammar(grammar, self.source, re_module) | |||
self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) | |||
# Compile the EBNF grammar into BNF | |||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | |||
@@ -322,7 +322,7 @@ class Lark(Serialize): | |||
self._callbacks = None | |||
# we don't need these callbacks if we aren't building a tree | |||
if self.options.ambiguity != 'forest': | |||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | |||
def _build_parser(self): | |||
@@ -650,7 +650,7 @@ class Grammar: | |||
_imported_grammars = {} | |||
def import_grammar(grammar_path, re_, base_paths=[]): | |||
def import_grammar(grammar_path, loader, base_paths=[]): | |||
if grammar_path not in _imported_grammars: | |||
import_paths = base_paths + IMPORT_PATHS | |||
for import_path in import_paths: | |||
@@ -658,7 +658,7 @@ def import_grammar(grammar_path, re_, base_paths=[]): | |||
joined_path = os.path.join(import_path, grammar_path) | |||
with open(joined_path, encoding='utf8') as f: | |||
text = f.read() | |||
grammar = load_grammar(text, joined_path, re_) | |||
grammar = loader.load_grammar(text, joined_path) | |||
_imported_grammars[grammar_path] = grammar | |||
break | |||
else: | |||
@@ -803,7 +803,7 @@ class GrammarLoader: | |||
('%ignore expects a value', ['%ignore %import\n']), | |||
] | |||
def __init__(self, re_module): | |||
def __init__(self, re_module, always_keep_all_tokens): | |||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | |||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||
@@ -816,6 +816,7 @@ class GrammarLoader: | |||
self.canonize_tree = CanonizeTree() | |||
self.re_module = re_module | |||
self.always_keep_all_tokens = always_keep_all_tokens | |||
def load_grammar(self, grammar_text, grammar_name='<?>'): | |||
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | |||
@@ -901,7 +902,7 @@ class GrammarLoader: | |||
# import grammars | |||
for dotted_path, (base_paths, aliases) in imports.items(): | |||
grammar_path = os.path.join(*dotted_path) + EXT | |||
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths) | |||
g = import_grammar(grammar_path, self, base_paths=base_paths) | |||
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | |||
term_defs += new_td | |||
@@ -946,7 +947,10 @@ class GrammarLoader: | |||
rules = rule_defs | |||
rule_names = {} | |||
for name, params, _x, _o in rules: | |||
for name, params, _x, option in rules: | |||
if self.always_keep_all_tokens: # We need to do this somewhere. Might as well prevent an additional loop | |||
option.keep_all_tokens = True | |||
if name.startswith('__'): | |||
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | |||
if name in rule_names: | |||
@@ -981,5 +985,5 @@ class GrammarLoader: | |||
def load_grammar(grammar, source, re_): | |||
return GrammarLoader(re_).load_grammar(grammar, source) | |||
def load_grammar(grammar, source, re_, always_keep_all_tokens): | |||
return GrammarLoader(re_, always_keep_all_tokens).load_grammar(grammar, source) |
@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper): | |||
class ParseTreeBuilder: | |||
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): | |||
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): | |||
self.tree_class = tree_class | |||
self.propagate_positions = propagate_positions | |||
self.always_keep_all_tokens = keep_all_tokens | |||
self.ambiguous = ambiguous | |||
self.maybe_placeholders = maybe_placeholders | |||
@@ -311,7 +310,7 @@ class ParseTreeBuilder: | |||
def _init_builders(self, rules): | |||
for rule in rules: | |||
options = rule.options | |||
keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens | |||
keep_all_tokens = options.keep_all_tokens | |||
expand_single_child = options.expand1 | |||
wrapper_chain = list(filter(None, [ | |||
@@ -2041,6 +2041,10 @@ def _make_parser_test(LEXER, PARSER): | |||
# Anonymous tokens shouldn't count | |||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, []) | |||
# Unless keep_all_tokens=True | |||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) | |||
self.assertEqual(p.parse("").children, [None, None, None]) | |||
# All invisible constructs shouldn't count | |||
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | |||