Merge branch 'MegaIng-keep_all_maybe'

5 years ago · 131012b893
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -274,7 +274,7 @@ class Lark(Serialize):
        assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )
        # Parse the grammar file and compose the grammars (TODO)
        self.grammar = load_grammar(grammar, self.source, re_module)
        self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens)
        if self.options.postlex is not None:
            terminals_to_keep = set(self.options.postlex.always_accept)
@@ -335,7 +335,13 @@ class Lark(Serialize):
        self._callbacks = None
        # we don't need these callbacks if we aren't building a tree
        if self.options.ambiguity != 'forest':
            self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
            self._parse_tree_builder = ParseTreeBuilder(
                    self.rules,
                    self.options.tree_class or Tree,
                    self.options.propagate_positions,
                    self.options.parser!='lalr' and self.options.ambiguity=='explicit',
                    self.options.maybe_placeholders
                )
            self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
    def _build_parser(self):
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -650,22 +650,6 @@ class Grammar:
 _imported_grammars = {}
 def import_grammar(grammar_path, re_, base_paths=[]):
    if grammar_path not in _imported_grammars:
        import_paths = base_paths + IMPORT_PATHS
        for import_path in import_paths:
            with suppress(IOError):
                joined_path = os.path.join(import_path, grammar_path)
                with open(joined_path, encoding='utf8') as f:
                    text = f.read()
                grammar = load_grammar(text, joined_path, re_)
                _imported_grammars[grammar_path] = grammar
                break
        else:
            open(grammar_path, encoding='utf8')
            assert False
    return _imported_grammars[grammar_path]
 def import_from_grammar_into_namespace(grammar, namespace, aliases):
    """Returns all rules and terminals of grammar, prepended
@@ -803,7 +787,7 @@ class GrammarLoader:
        ('%ignore expects a value', ['%ignore %import\n']),
    ]
    def __init__(self, re_module):
    def __init__(self, re_module, global_keep_all_tokens):
        terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
        rules = [options_from_rule(name, None, x) for name, x in  RULES.items()]
@@ -816,6 +800,24 @@ class GrammarLoader:
        self.canonize_tree = CanonizeTree()
        self.re_module = re_module
        self.global_keep_all_tokens = global_keep_all_tokens
    def import_grammar(self, grammar_path, base_paths=[]):
        if grammar_path not in _imported_grammars:
            import_paths = base_paths + IMPORT_PATHS
            for import_path in import_paths:
                with suppress(IOError):
                    joined_path = os.path.join(import_path, grammar_path)
                    with open(joined_path, encoding='utf8') as f:
                        text = f.read()
                    grammar = self.load_grammar(text, joined_path)
                    _imported_grammars[grammar_path] = grammar
                    break
            else:
                open(grammar_path, encoding='utf8') # Force a file not found error
                assert False
        return _imported_grammars[grammar_path]
    def load_grammar(self, grammar_text, grammar_name='<?>'):
        "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
@@ -901,7 +903,7 @@ class GrammarLoader:
        # import grammars
        for dotted_path, (base_paths, aliases) in imports.items():
            grammar_path = os.path.join(*dotted_path) + EXT
            g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
            g = self.import_grammar(grammar_path, base_paths=base_paths)
            new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
            term_defs += new_td
@@ -946,7 +948,11 @@ class GrammarLoader:
        rules = rule_defs
        rule_names = {}
        for name, params, _x, _o in rules:
        for name, params, _x, option in rules:
            # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
            if self.global_keep_all_tokens:
                option.keep_all_tokens = True
            if name.startswith('__'):
                raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
            if name in rule_names:
@@ -981,5 +987,5 @@ class GrammarLoader:
 def load_grammar(grammar, source, re_):
    return GrammarLoader(re_).load_grammar(grammar, source)
 def load_grammar(grammar, source, re_, global_keep_all_tokens):
    return GrammarLoader(re_, global_keep_all_tokens).load_grammar(grammar, source)
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper):
 class ParseTreeBuilder:
    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
    def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
        self.tree_class = tree_class
        self.propagate_positions = propagate_positions
        self.always_keep_all_tokens = keep_all_tokens
        self.ambiguous = ambiguous
        self.maybe_placeholders = maybe_placeholders
@@ -311,7 +310,7 @@ class ParseTreeBuilder:
    def _init_builders(self, rules):
        for rule in rules:
            options = rule.options
            keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
            keep_all_tokens = options.keep_all_tokens
            expand_single_child = options.expand1
            wrapper_chain = list(filter(None, [
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2064,6 +2064,10 @@ def _make_parser_test(LEXER, PARSER):
            # Anonymous tokens shouldn't count
            p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
            self.assertEqual(p.parse("").children, [])
            # Unless keep_all_tokens=True
            p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
            self.assertEqual(p.parse("").children, [None, None, None])
            # All invisible constructs shouldn't count
            p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]