From bdcd2e0011bc0cd4fa3c35f59f28c78a1fa61a78 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Tue, 29 Jun 2021 22:32:56 +0200 Subject: [PATCH] fix tree_matcher when keep_all_tokens=True by setting sym.filter_out correctly. --- lark/load_grammar.py | 5 ++++- tests/test_reconstructor.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index dcb4c81..c7b98a7 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -630,7 +630,10 @@ class Grammar: else: exp_options = options - assert all(isinstance(x, Symbol) for x in expansion), expansion + for sym in expansion: + assert isinstance(sym, Symbol) + if sym.is_term and exp_options and exp_options.keep_all_tokens: + sym.filter_out = False rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) compiled_rules.append(rule) diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index f132312..e2f2dbe 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -3,6 +3,7 @@ import json import sys import unittest +from itertools import product from unittest import TestCase from lark import Lark @@ -20,8 +21,8 @@ def _remove_ws(s): class TestReconstructor(TestCase): - def assert_reconstruct(self, grammar, code): - parser = Lark(grammar, parser='lalr', maybe_placeholders=False) + def assert_reconstruct(self, grammar, code, **options): + parser = Lark(grammar, parser='lalr', maybe_placeholders=False, **options) tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new)) @@ -142,6 +143,17 @@ class TestReconstructor(TestCase): new_json = Reconstructor(json_parser).reconstruct(tree) self.assertEqual(json.loads(new_json), json.loads(test_json)) + def test_keep_all_tokens(self): + g = """ + start: "a"? _B? c? _d? + _B: "b" + c: "c" + _d: "d" + """ + examples = list(map(''.join, product(('', 'a'), ('', 'b'), ('', 'c'), ('', 'd'), ))) + for code in examples: + self.assert_reconstruct(g, code, keep_all_tokens=True) + @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.") def test_switch_grammar_unicode_terminal(self): """