Browse Source

Added option: Keep all tokens

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
1da851516c
3 changed files with 15 additions and 6 deletions
  1. +6
    -4
      lark/lark.py
  2. +3
    -2
      lark/parse_tree_builder.py
  3. +6
    -0
      tests/test_parser.py

+ 6
- 4
lark/lark.py View File

@@ -63,8 +63,6 @@ class LarkOptions(object):
if self.parser == 'earley' and self.transformer:
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
if self.keep_all_tokens:
raise NotImplementedError("keep_all_tokens: Not implemented yet!")

if o:
raise ValueError("Unknown options: %s" % o.keys())
@@ -121,7 +119,7 @@ class Lark:

assert isinstance(grammar, STRING_TYPE)

if self.options.cache_grammar or self.options.keep_all_tokens:
if self.options.cache_grammar:
raise NotImplementedError("Not available yet")

assert not self.options.profile, "Feature temporarily disabled"
@@ -142,8 +140,12 @@ class Lark:
assert self.options.parser == 'earley'
assert self.options.ambiguity in ('resolve', 'explicit', 'auto')

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, source)

# Compile the EBNF grammar into BNF
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start)

self.ignore_tokens = self.grammar.extra['ignore']

self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)
@@ -162,7 +164,7 @@ class Lark:

def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions)
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
if self.profiler:
for f in dir(callback):


+ 3
- 2
lark/parse_tree_builder.py View File

@@ -65,9 +65,10 @@ def propagate_positions_wrapper(f):
return _f

class ParseTreeBuilder:
def __init__(self, tree_class, propagate_positions=False):
def __init__(self, tree_class, propagate_positions=False, keep_all_tokens=False):
self.tree_class = tree_class
self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens

def _create_tree_builder_function(self, name):
tree_class = self.tree_class
@@ -88,7 +89,7 @@ class ParseTreeBuilder:
filter_out.add(origin)

for origin, (expansions, options) in rules.items():
keep_all_tokens = options.keep_all_tokens if options else False
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
expand1 = options.expand1 if options else False
create_token = options.create_token if options else False



+ 6
- 0
tests/test_parser.py View File

@@ -538,6 +538,12 @@ def _make_parser_test(LEXER, PARSER):
g.parse("+2e-9")
self.assertRaises(ParseError, g.parse, "+2e-9e")

def test_keep_all_tokens(self):
l = _Lark("""start: "a"+ """, keep_all_tokens=True)
tree = l.parse('aaa')
self.assertEqual(tree.children, ['a', 'a', 'a'])


def test_token_flags(self):
l = _Lark("""!start: "a"i+
"""


Loading…
Cancel
Save