@@ -267,7 +267,7 @@ class Lark(Serialize): | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | ||||
# Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
self.grammar = load_grammar(grammar, self.source, re_module) | |||||
self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) | |||||
# Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | ||||
@@ -322,7 +322,7 @@ class Lark(Serialize): | |||||
self._callbacks = None | self._callbacks = None | ||||
# we don't need these callbacks if we aren't building a tree | # we don't need these callbacks if we aren't building a tree | ||||
if self.options.ambiguity != 'forest': | if self.options.ambiguity != 'forest': | ||||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
def _build_parser(self): | def _build_parser(self): | ||||
@@ -650,7 +650,7 @@ class Grammar: | |||||
_imported_grammars = {} | _imported_grammars = {} | ||||
def import_grammar(grammar_path, re_, base_paths=[]): | |||||
def import_grammar(grammar_path, loader, base_paths=[]): | |||||
if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
import_paths = base_paths + IMPORT_PATHS | import_paths = base_paths + IMPORT_PATHS | ||||
for import_path in import_paths: | for import_path in import_paths: | ||||
@@ -658,7 +658,7 @@ def import_grammar(grammar_path, re_, base_paths=[]): | |||||
joined_path = os.path.join(import_path, grammar_path) | joined_path = os.path.join(import_path, grammar_path) | ||||
with open(joined_path, encoding='utf8') as f: | with open(joined_path, encoding='utf8') as f: | ||||
text = f.read() | text = f.read() | ||||
grammar = load_grammar(text, joined_path, re_) | |||||
grammar = loader.load_grammar(text, joined_path) | |||||
_imported_grammars[grammar_path] = grammar | _imported_grammars[grammar_path] = grammar | ||||
break | break | ||||
else: | else: | ||||
@@ -803,7 +803,7 @@ class GrammarLoader: | |||||
('%ignore expects a value', ['%ignore %import\n']), | ('%ignore expects a value', ['%ignore %import\n']), | ||||
] | ] | ||||
def __init__(self, re_module): | |||||
def __init__(self, re_module, always_keep_all_tokens): | |||||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | ||||
@@ -816,6 +816,7 @@ class GrammarLoader: | |||||
self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
self.re_module = re_module | self.re_module = re_module | ||||
self.always_keep_all_tokens = always_keep_all_tokens | |||||
def load_grammar(self, grammar_text, grammar_name='<?>'): | def load_grammar(self, grammar_text, grammar_name='<?>'): | ||||
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | ||||
@@ -901,7 +902,7 @@ class GrammarLoader: | |||||
# import grammars | # import grammars | ||||
for dotted_path, (base_paths, aliases) in imports.items(): | for dotted_path, (base_paths, aliases) in imports.items(): | ||||
grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
g = import_grammar(grammar_path, self.re_module, base_paths=base_paths) | |||||
g = import_grammar(grammar_path, self, base_paths=base_paths) | |||||
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
term_defs += new_td | term_defs += new_td | ||||
@@ -946,7 +947,10 @@ class GrammarLoader: | |||||
rules = rule_defs | rules = rule_defs | ||||
rule_names = {} | rule_names = {} | ||||
for name, params, _x, _o in rules: | |||||
for name, params, _x, option in rules: | |||||
if self.always_keep_all_tokens: # We need to do this somewhere. Might as well prevent an additional loop | |||||
option.keep_all_tokens = True | |||||
if name.startswith('__'): | if name.startswith('__'): | ||||
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | ||||
if name in rule_names: | if name in rule_names: | ||||
@@ -981,5 +985,5 @@ class GrammarLoader: | |||||
def load_grammar(grammar, source, re_): | |||||
return GrammarLoader(re_).load_grammar(grammar, source) | |||||
def load_grammar(grammar, source, re_, always_keep_all_tokens): | |||||
return GrammarLoader(re_, always_keep_all_tokens).load_grammar(grammar, source) |
@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper): | |||||
class ParseTreeBuilder: | class ParseTreeBuilder: | ||||
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): | |||||
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): | |||||
self.tree_class = tree_class | self.tree_class = tree_class | ||||
self.propagate_positions = propagate_positions | self.propagate_positions = propagate_positions | ||||
self.always_keep_all_tokens = keep_all_tokens | |||||
self.ambiguous = ambiguous | self.ambiguous = ambiguous | ||||
self.maybe_placeholders = maybe_placeholders | self.maybe_placeholders = maybe_placeholders | ||||
@@ -311,7 +310,7 @@ class ParseTreeBuilder: | |||||
def _init_builders(self, rules): | def _init_builders(self, rules): | ||||
for rule in rules: | for rule in rules: | ||||
options = rule.options | options = rule.options | ||||
keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens | |||||
keep_all_tokens = options.keep_all_tokens | |||||
expand_single_child = options.expand1 | expand_single_child = options.expand1 | ||||
wrapper_chain = list(filter(None, [ | wrapper_chain = list(filter(None, [ | ||||
@@ -2041,6 +2041,10 @@ def _make_parser_test(LEXER, PARSER): | |||||
# Anonymous tokens shouldn't count | # Anonymous tokens shouldn't count | ||||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | ||||
self.assertEqual(p.parse("").children, []) | self.assertEqual(p.parse("").children, []) | ||||
# Unless keep_all_tokens=True | |||||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) | |||||
self.assertEqual(p.parse("").children, [None, None, None]) | |||||
# All invisible constructs shouldn't count | # All invisible constructs shouldn't count | ||||
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | ||||