Anonymous tokens would become visible if they had the same value as named tokens. That's because they are merged for the lexer. But after this change, the rules for visibility are based on their use in the rule, and not their name or identity.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
@@ -20,9 +20,10 @@ class Symbol(object): | |||
class Terminal(Symbol): | |||
is_term = True | |||
@property | |||
def filter_out(self): | |||
return self.name.startswith('_') | |||
def __init__(self, name, filter_out=False): | |||
self.name = name | |||
self.filter_out = filter_out | |||
class NonTerminal(Symbol): | |||
is_term = False | |||
@@ -46,17 +47,14 @@ class Rule(object): | |||
class RuleOptions: | |||
def __init__(self, keep_all_tokens=False, expand1=False, filter_out=False, priority=None): | |||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None): | |||
self.keep_all_tokens = keep_all_tokens | |||
self.expand1 = expand1 | |||
self.priority = priority | |||
self.filter_out = filter_out # remove this rule from the tree | |||
def __repr__(self): | |||
return 'RuleOptions(%r, %r, %r, %r)' % ( | |||
return 'RuleOptions(%r, %r, %r)' % ( | |||
self.keep_all_tokens, | |||
self.expand1, | |||
self.priority, | |||
self.filter_out | |||
) |
@@ -307,7 +307,7 @@ class ExtractAnonTokens(InlineTransformer): | |||
self.token_reverse[p] = tokendef | |||
self.tokens.append(tokendef) | |||
return Terminal(Token('TOKEN', token_name, -1)) | |||
return Terminal(Token('TOKEN', token_name, -1), filter_out=isinstance(p, PatternStr)) | |||
def _rfind(s, choices): | |||
@@ -415,8 +415,11 @@ class PrepareSymbols(Transformer): | |||
v ,= v | |||
if isinstance(v, Tree): | |||
return v | |||
return {'TOKEN': Terminal, | |||
'RULE': NonTerminal}[v.type](v.value) | |||
elif v.type == 'RULE': | |||
return NonTerminal(v.value) | |||
elif v.type == 'TOKEN': | |||
return Terminal(v.value, filter_out=v.startswith('_')) | |||
assert False | |||
def _choice_of_rules(rules): | |||
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | |||
@@ -532,7 +535,7 @@ def options_from_rule(name, *x): | |||
def symbols_from_strcase(expansion): | |||
return [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion] | |||
return [Terminal(x, filter_out=x.startswith('_')) if is_terminal(x) else NonTerminal(x) for x in expansion] | |||
class PrepareGrammar(InlineTransformer): | |||
def terminal(self, name): | |||
@@ -75,8 +75,9 @@ class ChildFilterLALR(ChildFilter): | |||
def _should_expand(sym): | |||
return not sym.is_term and sym.name.startswith('_') | |||
def maybe_create_child_filter(expansion, filter_out, ambiguous): | |||
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] | |||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous): | |||
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) | |||
if keep_all_tokens or not (sym.is_term and sym.filter_out)] | |||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include) | |||
@@ -97,10 +98,6 @@ class ParseTreeBuilder: | |||
self.user_aliases = {} | |||
def _init_builders(self, rules): | |||
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} | |||
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} | |||
assert all(t.name.startswith('_') for t in filter_out) | |||
for rule in rules: | |||
options = rule.options | |||
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) | |||
@@ -108,7 +105,7 @@ class ParseTreeBuilder: | |||
wrapper_chain = filter(None, [ | |||
(expand_single_child and not rule.alias) and ExpandSingleChild, | |||
maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out, self.ambiguous), | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), | |||
self.propagate_positions and PropagatePositions, | |||
]) | |||
@@ -7,11 +7,6 @@ from .lexer import Lexer, ContextualLexer, Token | |||
from .common import GrammarError | |||
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | |||
from .tree import Tree | |||
from .grammar import Terminal, NonTerminal | |||
def terminals(seq): | |||
# return [Terminal(t) for t in seq] | |||
return seq | |||
class WithLexer: | |||
def init_traditional_lexer(self, lexer_conf): | |||
@@ -23,8 +18,8 @@ class WithLexer: | |||
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | |||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
self.lexer = ContextualLexer(lexer_conf.tokens, states, | |||
ignore=terminals(lexer_conf.ignore), | |||
always_accept=terminals(always_accept), | |||
ignore=lexer_conf.ignore, | |||
always_accept=always_accept, | |||
user_callbacks=lexer_conf.callbacks) | |||
def lex(self, text): | |||
@@ -649,28 +649,25 @@ def _make_parser_test(LEXER, PARSER): | |||
x = g.parse('b') | |||
def test_token_not_anon(self): | |||
"""Tests that "a" is matched as A, rather than an anonymous token. | |||
That means that "a" is not filtered out, despite being an 'immediate string'. | |||
Whether or not this is the intuitive behavior, I'm not sure yet. | |||
Perhaps the right thing to do is report a collision (if such is relevant) | |||
-Erez | |||
"""Tests that "a" is matched as an anonymous token, and not A. | |||
""" | |||
g = _Lark("""start: "a" | |||
A: "a" """) | |||
x = g.parse('a') | |||
self.assertEqual(len(x.children), 0, '"a" should be considered anonymous') | |||
self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous') | |||
g = _Lark("""start: "a" A | |||
A: "a" """) | |||
x = g.parse('aa') | |||
self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous') | |||
self.assertEqual(x.children[0].type, "A") | |||
g = _Lark("""start: /a/ | |||
A: /a/ """) | |||
x = g.parse('a') | |||
self.assertEqual(len(x.children), 1, '/a/ should not be considered anonymous') | |||
self.assertEqual(x.children[0].type, "A") | |||
self.assertEqual(len(x.children), 1) | |||
self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/") | |||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||
def test_maybe(self): | |||