From 3112259b2388fa9fc3cdc2c77c85c416e3a80c58 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Thu, 1 Oct 2020 16:34:52 +0200
Subject: [PATCH 1/3] Fix for maybe_placeholders when keep_all_tokens=True

---
 lark/lark.py               |  4 ++--
 lark/load_grammar.py       | 18 +++++++++++-------
 lark/parse_tree_builder.py |  5 ++---
 tests/test_parser.py       |  4 ++++
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index 8799610..72c8ad0 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -267,7 +267,7 @@ class Lark(Serialize):
         assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )
 
         # Parse the grammar file and compose the grammars (TODO)
-        self.grammar = load_grammar(grammar, self.source, re_module)
+        self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens)
 
         # Compile the EBNF grammar into BNF
         self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
@@ -322,7 +322,7 @@ class Lark(Serialize):
         self._callbacks = None
         # we don't need these callbacks if we aren't building a tree
         if self.options.ambiguity != 'forest':
-            self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
+            self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
             self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
 
     def _build_parser(self):
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index cd36e4b..156a1be 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -650,7 +650,7 @@ class Grammar:
 
 
 _imported_grammars = {}
-def import_grammar(grammar_path, re_, base_paths=[]):
+def import_grammar(grammar_path, loader, base_paths=[]):
     if grammar_path not in _imported_grammars:
         import_paths = base_paths + IMPORT_PATHS
         for import_path in import_paths:
@@ -658,7 +658,7 @@ def import_grammar(grammar_path, re_, base_paths=[]):
                 joined_path = os.path.join(import_path, grammar_path)
                 with open(joined_path, encoding='utf8') as f:
                     text = f.read()
-                grammar = load_grammar(text, joined_path, re_)
+                grammar = loader.load_grammar(text, joined_path)
                 _imported_grammars[grammar_path] = grammar
                 break
         else:
@@ -803,7 +803,7 @@ class GrammarLoader:
         ('%ignore expects a value', ['%ignore %import\n']),
     ]
 
-    def __init__(self, re_module):
+    def __init__(self, re_module, always_keep_all_tokens):
         terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
 
         rules = [options_from_rule(name, None, x) for name, x in  RULES.items()]
@@ -816,6 +816,7 @@ class GrammarLoader:
 
         self.canonize_tree = CanonizeTree()
         self.re_module = re_module
+        self.always_keep_all_tokens = always_keep_all_tokens
 
     def load_grammar(self, grammar_text, grammar_name='<?>'):
         "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
@@ -901,7 +902,7 @@ class GrammarLoader:
         # import grammars
         for dotted_path, (base_paths, aliases) in imports.items():
             grammar_path = os.path.join(*dotted_path) + EXT
-            g = import_grammar(grammar_path, self.re_module, base_paths=base_paths)
+            g = import_grammar(grammar_path, self, base_paths=base_paths)
             new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
 
             term_defs += new_td
@@ -946,7 +947,10 @@ class GrammarLoader:
         rules = rule_defs
 
         rule_names = {}
-        for name, params, _x, _o in rules:
+        for name, params, _x, option in rules:
+            if self.always_keep_all_tokens: # We need to do this somewhere. Might as well prevent an additional loop
+                option.keep_all_tokens = True
+            
             if name.startswith('__'):
                 raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
             if name in rule_names:
@@ -981,5 +985,5 @@ class GrammarLoader:
 
 
 
-def load_grammar(grammar, source, re_):
-    return GrammarLoader(re_).load_grammar(grammar, source)
+def load_grammar(grammar, source, re_, always_keep_all_tokens):
+    return GrammarLoader(re_, always_keep_all_tokens).load_grammar(grammar, source)
diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 8b81d29..a4c4330 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper):
 
 
 class ParseTreeBuilder:
-    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
+    def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
         self.tree_class = tree_class
         self.propagate_positions = propagate_positions
-        self.always_keep_all_tokens = keep_all_tokens
         self.ambiguous = ambiguous
         self.maybe_placeholders = maybe_placeholders
 
@@ -311,7 +310,7 @@ class ParseTreeBuilder:
     def _init_builders(self, rules):
         for rule in rules:
             options = rule.options
-            keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
+            keep_all_tokens = options.keep_all_tokens
             expand_single_child = options.expand1
 
             wrapper_chain = list(filter(None, [
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 83336c5..d97c810 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2041,6 +2041,10 @@ def _make_parser_test(LEXER, PARSER):
             # Anonymous tokens shouldn't count
             p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
             self.assertEqual(p.parse("").children, [])
+            
+            # Unless keep_all_tokens=True
+            p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
+            self.assertEqual(p.parse("").children, [None, None, None])
 
             # All invisible constructs shouldn't count
             p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]

From ab5bc3f47e1a83f2c420fd8bfb4108719baa8d14 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Tue, 6 Oct 2020 02:32:25 +0200
Subject: [PATCH 2/3] Improved comment

---
 lark/load_grammar.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 156a1be..08990f2 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -948,7 +948,8 @@ class GrammarLoader:
 
         rule_names = {}
         for name, params, _x, option in rules:
-            if self.always_keep_all_tokens: # We need to do this somewhere. Might as well prevent an additional loop
+            # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
+            if self.always_keep_all_tokens: 
                 option.keep_all_tokens = True
             
             if name.startswith('__'):

From e4d73526d4eec4f049fd7737e6f3f813c82aa357 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 6 Oct 2020 18:19:52 +0300
Subject: [PATCH 3/3] Refactor

---
 lark/lark.py         |  8 +++++++-
 lark/load_grammar.py | 47 ++++++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index 835a6eb..770b821 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -335,7 +335,13 @@ class Lark(Serialize):
         self._callbacks = None
         # we don't need these callbacks if we aren't building a tree
         if self.options.ambiguity != 'forest':
-            self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
+            self._parse_tree_builder = ParseTreeBuilder(
+                    self.rules,
+                    self.options.tree_class or Tree,
+                    self.options.propagate_positions,
+                    self.options.parser!='lalr' and self.options.ambiguity=='explicit',
+                    self.options.maybe_placeholders
+                )
             self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
 
     def _build_parser(self):
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 9fb49f6..d039638 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -650,22 +650,6 @@ class Grammar:
 
 
 _imported_grammars = {}
-def import_grammar(grammar_path, loader, base_paths=[]):
-    if grammar_path not in _imported_grammars:
-        import_paths = base_paths + IMPORT_PATHS
-        for import_path in import_paths:
-            with suppress(IOError):
-                joined_path = os.path.join(import_path, grammar_path)
-                with open(joined_path, encoding='utf8') as f:
-                    text = f.read()
-                grammar = loader.load_grammar(text, joined_path)
-                _imported_grammars[grammar_path] = grammar
-                break
-        else:
-            open(grammar_path, encoding='utf8')
-            assert False
-
-    return _imported_grammars[grammar_path]
 
 def import_from_grammar_into_namespace(grammar, namespace, aliases):
     """Returns all rules and terminals of grammar, prepended
@@ -803,7 +787,7 @@ class GrammarLoader:
         ('%ignore expects a value', ['%ignore %import\n']),
     ]
 
-    def __init__(self, re_module, always_keep_all_tokens):
+    def __init__(self, re_module, global_keep_all_tokens):
         terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
 
         rules = [options_from_rule(name, None, x) for name, x in  RULES.items()]
@@ -816,7 +800,24 @@ class GrammarLoader:
 
         self.canonize_tree = CanonizeTree()
         self.re_module = re_module
-        self.always_keep_all_tokens = always_keep_all_tokens
+        self.global_keep_all_tokens = global_keep_all_tokens
+
+    def import_grammar(self, grammar_path, base_paths=[]):
+        if grammar_path not in _imported_grammars:
+            import_paths = base_paths + IMPORT_PATHS
+            for import_path in import_paths:
+                with suppress(IOError):
+                    joined_path = os.path.join(import_path, grammar_path)
+                    with open(joined_path, encoding='utf8') as f:
+                        text = f.read()
+                    grammar = self.load_grammar(text, joined_path)
+                    _imported_grammars[grammar_path] = grammar
+                    break
+            else:
+                open(grammar_path, encoding='utf8') # Force a file not found error
+                assert False
+
+        return _imported_grammars[grammar_path]
 
     def load_grammar(self, grammar_text, grammar_name='<?>'):
         "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
@@ -902,7 +903,7 @@ class GrammarLoader:
         # import grammars
         for dotted_path, (base_paths, aliases) in imports.items():
             grammar_path = os.path.join(*dotted_path) + EXT
-            g = import_grammar(grammar_path, self, base_paths=base_paths)
+            g = self.import_grammar(grammar_path, base_paths=base_paths)
             new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
 
             term_defs += new_td
@@ -949,9 +950,9 @@ class GrammarLoader:
         rule_names = {}
         for name, params, _x, option in rules:
             # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
-            if self.always_keep_all_tokens: 
+            if self.global_keep_all_tokens:
                 option.keep_all_tokens = True
-            
+
             if name.startswith('__'):
                 raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name)
             if name in rule_names:
@@ -986,5 +987,5 @@ class GrammarLoader:
 
 
 
-def load_grammar(grammar, source, re_, always_keep_all_tokens):
-    return GrammarLoader(re_, always_keep_all_tokens).load_grammar(grammar, source)
+def load_grammar(grammar, source, re_, global_keep_all_tokens):
+    return GrammarLoader(re_, global_keep_all_tokens).load_grammar(grammar, source)