From 1da851516ceb3e8f374d9649b23256640fa11b77 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 29 May 2017 00:05:54 +0300 Subject: [PATCH] Added option: Keep all tokens --- lark/lark.py | 10 ++++++---- lark/parse_tree_builder.py | 5 +++-- tests/test_parser.py | 6 ++++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 3d8cbcd..488ed5c 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -63,8 +63,6 @@ class LarkOptions(object): if self.parser == 'earley' and self.transformer: raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') - if self.keep_all_tokens: - raise NotImplementedError("keep_all_tokens: Not implemented yet!") if o: raise ValueError("Unknown options: %s" % o.keys()) @@ -121,7 +119,7 @@ class Lark: assert isinstance(grammar, STRING_TYPE) - if self.options.cache_grammar or self.options.keep_all_tokens: + if self.options.cache_grammar: raise NotImplementedError("Not available yet") assert not self.options.profile, "Feature temporarily disabled" @@ -142,8 +140,12 @@ class Lark: assert self.options.parser == 'earley' assert self.options.ambiguity in ('resolve', 'explicit', 'auto') + # Parse the grammar file and compose the grammars (TODO) self.grammar = load_grammar(grammar, source) + + # Compile the EBNF grammar into BNF tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) + self.ignore_tokens = self.grammar.extra['ignore'] self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) @@ -162,7 +164,7 @@ class Lark: def _build_parser(self): self.parser_class = get_frontend(self.options.parser, self.options.lexer) - self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions) + self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) if self.profiler: for f in dir(callback): diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index b3bc522..0c21bfb 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -65,9 +65,10 @@ def propagate_positions_wrapper(f): return _f class ParseTreeBuilder: - def __init__(self, tree_class, propagate_positions=False): + def __init__(self, tree_class, propagate_positions=False, keep_all_tokens=False): self.tree_class = tree_class self.propagate_positions = propagate_positions + self.always_keep_all_tokens = keep_all_tokens def _create_tree_builder_function(self, name): tree_class = self.tree_class @@ -88,7 +89,7 @@ class ParseTreeBuilder: filter_out.add(origin) for origin, (expansions, options) in rules.items(): - keep_all_tokens = options.keep_all_tokens if options else False + keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) expand1 = options.expand1 if options else False create_token = options.create_token if options else False diff --git a/tests/test_parser.py b/tests/test_parser.py index 20fdb87..55c010e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -538,6 +538,12 @@ def _make_parser_test(LEXER, PARSER): g.parse("+2e-9") self.assertRaises(ParseError, g.parse, "+2e-9e") + def test_keep_all_tokens(self): + l = _Lark("""start: "a"+ """, keep_all_tokens=True) + tree = l.parse('aaa') + self.assertEqual(tree.children, ['a', 'a', 'a']) + + def test_token_flags(self): l = _Lark("""!start: "a"i+ """