@@ -125,7 +125,8 @@ Lark has no dependencies. | |||
### Projects using Lark | |||
- [mappyfile](https://github.com/geographika/mappyfile) - A pure Python MapFile parser for working with MapServer | |||
- [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration | |||
- [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer | |||
Using Lark? Send me a message and I'll add your project! | |||
@@ -251,6 +252,22 @@ Lark offers both Earley and LALR(1), which means you can choose between the most | |||
Lark uses the [MIT license](LICENSE). | |||
## Contribute | |||
Lark is currently accepting pull-requests. | |||
There are many ways you can help the project: | |||
* Improve the performance of Lark's parsing algorithm | |||
* Implement macros for grammars (important for grammar composition) | |||
* Write new grammars for Lark's library | |||
* Write & improve the documentation | |||
* Write a blog post introducing Lark to your audience | |||
If you're interested in taking one of these on, let me know and I will provide more details and assist you in the process. | |||
## Contact | |||
If you have any questions or want to contribute, you can email me at erezshin at gmail com. | |||
If you have any questions or want my assistance, you can email me at erezshin at gmail com. | |||
I'm also available for contract work. |
@@ -116,11 +116,12 @@ AWAIT: "await" | |||
| atom_expr "." NAME -> getattr | |||
| atom | |||
?atom: "(" [yield_expr|testlist_comp] ")" | |||
| "[" [testlist_comp] "]" | |||
| "{" [dictorsetmaker] "}" | |||
?atom: "(" [yield_expr|testlist_comp] ")" -> tuple | |||
| "[" [testlist_comp] "]" -> list | |||
| "{" [dictorsetmaker] "}" -> dict | |||
| NAME -> var | |||
| number | string+ | "..." | |||
| number | string+ | |||
| "..." -> ellipsis | |||
| "None" -> const_none | |||
| "True" -> const_true | |||
| "False" -> const_false | |||
@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
from .lark import Lark | |||
from .utils import inline_args | |||
__version__ = "0.2.7" | |||
__version__ = "0.2.10" |
@@ -1,4 +1,5 @@ | |||
import re | |||
import sre_parse | |||
class GrammarError(Exception): | |||
pass | |||
@@ -40,7 +41,7 @@ class LexerConf: | |||
class ParserConf: | |||
def __init__(self, rules, callback, start): | |||
assert all(len(r)==3 for r in rules) | |||
assert all(len(r) == 4 for r in rules) | |||
self.rules = rules | |||
self.callback = callback | |||
self.start = start | |||
@@ -57,9 +58,9 @@ class Pattern(object): | |||
# Pattern Hashing assumes all subclasses have a different priority! | |||
def __hash__(self): | |||
return hash((self.priority, self.value)) | |||
return hash((type(self), self.value)) | |||
def __eq__(self, other): | |||
return self.priority == other.priority and self.value == other.value | |||
return type(self) == type(other) and self.value == other.value | |||
def _get_flags(self): | |||
if self.flags: | |||
@@ -71,13 +72,21 @@ class PatternStr(Pattern): | |||
def to_regexp(self): | |||
return self._get_flags() + re.escape(self.value) | |||
priority = 0 | |||
@property | |||
def min_width(self): | |||
return len(self.value) | |||
max_width = min_width | |||
class PatternRE(Pattern): | |||
def to_regexp(self): | |||
return self._get_flags() + self.value | |||
priority = 1 | |||
@property | |||
def min_width(self): | |||
return sre_parse.parse(self.to_regexp()).getwidth()[0] | |||
@property | |||
def max_width(self): | |||
return sre_parse.parse(self.to_regexp()).getwidth()[1] | |||
class TokenDef(object): | |||
def __init__(self, name, pattern): | |||
@@ -39,6 +39,7 @@ class LarkOptions(object): | |||
postlex - Lexer post-processing (Default: None) | |||
start - The start symbol (Default: start) | |||
profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | |||
propagate_positions - Experimental. Don't use yet. | |||
""" | |||
__doc__ += OPTIONS_DOC | |||
def __init__(self, options_dict): | |||
@@ -55,14 +56,13 @@ class LarkOptions(object): | |||
self.start = o.pop('start', 'start') | |||
self.profile = o.pop('profile', False) | |||
self.ambiguity = o.pop('ambiguity', 'auto') | |||
self.propagate_positions = o.pop('propagate_positions', False) | |||
assert self.parser in ('earley', 'lalr', None) | |||
if self.parser == 'earley' and self.transformer: | |||
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | |||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
if self.keep_all_tokens: | |||
raise NotImplementedError("keep_all_tokens: Not implemented yet!") | |||
if o: | |||
raise ValueError("Unknown options: %s" % o.keys()) | |||
@@ -119,7 +119,7 @@ class Lark: | |||
assert isinstance(grammar, STRING_TYPE) | |||
if self.options.cache_grammar or self.options.keep_all_tokens: | |||
if self.options.cache_grammar: | |||
raise NotImplementedError("Not available yet") | |||
assert not self.options.profile, "Feature temporarily disabled" | |||
@@ -142,8 +142,12 @@ class Lark: | |||
assert self.options.parser == 'earley' | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||
# Parse the grammar file and compose the grammars (TODO) | |||
self.grammar = load_grammar(grammar, source) | |||
# Compile the EBNF grammar into BNF | |||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) | |||
self.ignore_tokens = self.grammar.extra['ignore'] | |||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
@@ -162,7 +166,7 @@ class Lark: | |||
def _build_parser(self): | |||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
if self.profiler: | |||
for f in dir(callback): | |||
@@ -1,7 +1,6 @@ | |||
## Lexer Implementation | |||
import re | |||
import sre_parse | |||
from .utils import Str, classify | |||
from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
@@ -120,8 +119,7 @@ class Lexer(object): | |||
except: | |||
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | |||
width = sre_parse.parse(t.pattern.to_regexp()).getwidth() | |||
if width[0] == 0: | |||
if t.pattern.min_width == 0: | |||
raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) | |||
token_names = {t.name for t in tokens} | |||
@@ -133,7 +131,7 @@ class Lexer(object): | |||
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | |||
self.ignore_types = [t for t in ignore] | |||
tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True) | |||
tokens.sort(key=lambda x:x.pattern.max_width, reverse=True) | |||
tokens, self.callback = _create_unless(tokens) | |||
assert all(self.callback.values()) | |||
@@ -155,17 +153,27 @@ class Lexer(object): | |||
if m: | |||
value = m.group(0) | |||
type_ = type_from_index[m.lastindex] | |||
if type_ not in ignore_types: | |||
to_yield = type_ not in ignore_types | |||
if to_yield: | |||
t = Token(type_, value, lex_pos, line, lex_pos - col_start_pos) | |||
end_col = t.column + len(value) | |||
if t.type in self.callback: | |||
t = self.callback[t.type](t) | |||
yield t | |||
if type_ in newline_types: | |||
newlines = value.count(self.newline_char) | |||
if newlines: | |||
line += newlines | |||
col_start_pos = lex_pos + value.rindex(self.newline_char) | |||
last_newline_index = value.rindex(self.newline_char) + 1 | |||
col_start_pos = lex_pos + last_newline_index | |||
end_col = len(value) - last_newline_index | |||
if to_yield: | |||
t.end_line = line | |||
t.end_col = end_col | |||
yield t | |||
lex_pos += len(value) | |||
break | |||
else: | |||
@@ -75,6 +75,7 @@ TOKENS = { | |||
'_TO': '->', | |||
'_IGNORE': r'%ignore', | |||
'_IMPORT': r'%import', | |||
'NUMBER': '\d+', | |||
} | |||
RULES = { | |||
@@ -82,7 +83,8 @@ RULES = { | |||
'_list': ['_item', '_list _item'], | |||
'_item': ['rule', 'token', 'statement', '_NL'], | |||
'rule': ['RULE _COLON expansions _NL'], | |||
'rule': ['RULE _COLON expansions _NL', | |||
'RULE _DOT NUMBER _COLON expansions _NL'], | |||
'expansions': ['alias', | |||
'expansions _OR alias', | |||
'expansions _NL _OR alias'], | |||
@@ -313,7 +315,7 @@ class PrepareLiterals(InlineTransformer): | |||
class SplitLiterals(InlineTransformer): | |||
def pattern(self, p): | |||
if isinstance(p, PatternStr) and len(p.value)>1: | |||
return T('expansion', [T('pattern', [PatternStr(ch)]) for ch in p.value]) | |||
return T('expansion', [T('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value]) | |||
return T('pattern', [p]) | |||
class TokenTreeToPattern(Transformer): | |||
@@ -470,21 +472,29 @@ class Grammar: | |||
class RuleOptions: | |||
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False): | |||
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): | |||
self.keep_all_tokens = keep_all_tokens | |||
self.expand1 = expand1 | |||
self.create_token = create_token # used for scanless postprocessing | |||
self.priority = priority | |||
self.filter_out = filter_out # remove this rule from the tree | |||
# used for "token"-rules in scanless | |||
@classmethod | |||
def from_rule(cls, name, expansions): | |||
def from_rule(cls, name, *x): | |||
if len(x) > 1: | |||
priority, expansions = x | |||
priority = int(priority) | |||
else: | |||
expansions ,= x | |||
priority = None | |||
keep_all_tokens = name.startswith('!') | |||
name = name.lstrip('!') | |||
expand1 = name.startswith('?') | |||
name = name.lstrip('?') | |||
return name, expansions, cls(keep_all_tokens, expand1) | |||
return name, expansions, cls(keep_all_tokens, expand1, priority=priority) | |||
@@ -605,7 +615,7 @@ class GrammarLoader: | |||
raise GrammarError("Token '%s' defined more than once" % name) | |||
token_names.add(name) | |||
rules = [RuleOptions.from_rule(name, x) for name, x in rule_defs] | |||
rules = [RuleOptions.from_rule(*x) for x in rule_defs] | |||
rule_names = set() | |||
for name, _x, _o in rules: | |||
@@ -1,4 +1,5 @@ | |||
from .common import is_terminal, GrammarError | |||
from .utils import suppress | |||
from .lexer import Token | |||
class Callback(object): | |||
@@ -42,10 +43,32 @@ def create_rule_handler(expansion, usermethod, keep_all_tokens, filter_out): | |||
# else, if no filtering required.. | |||
return usermethod | |||
def propagate_positions_wrapper(f): | |||
def _f(args): | |||
res = f(args) | |||
if args: | |||
for a in args: | |||
with suppress(AttributeError): | |||
res.line = a.line | |||
res.column = a.column | |||
break | |||
for a in reversed(args): | |||
with suppress(AttributeError): | |||
res.end_line = a.end_line | |||
res.end_col = a.end_col | |||
break | |||
return res | |||
return _f | |||
class ParseTreeBuilder: | |||
def __init__(self, tree_class): | |||
def __init__(self, tree_class, propagate_positions=False, keep_all_tokens=False): | |||
self.tree_class = tree_class | |||
self.propagate_positions = propagate_positions | |||
self.always_keep_all_tokens = keep_all_tokens | |||
def _create_tree_builder_function(self, name): | |||
tree_class = self.tree_class | |||
@@ -66,7 +89,7 @@ class ParseTreeBuilder: | |||
filter_out.add(origin) | |||
for origin, (expansions, options) in rules.items(): | |||
keep_all_tokens = options.keep_all_tokens if options else False | |||
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) | |||
expand1 = options.expand1 if options else False | |||
create_token = options.create_token if options else False | |||
@@ -92,11 +115,14 @@ class ParseTreeBuilder: | |||
alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out) | |||
if self.propagate_positions: | |||
alias_handler = propagate_positions_wrapper(alias_handler) | |||
callback_name = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
if hasattr(callback, callback_name): | |||
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
setattr(callback, callback_name, alias_handler) | |||
new_rules.append(( _origin, expansion, callback_name )) | |||
new_rules.append(( _origin, expansion, callback_name, options )) | |||
return new_rules, callback |
@@ -131,7 +131,7 @@ class Earley_NoLex: | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | |||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
self.parser = earley.Parser(rules, | |||
@@ -158,7 +158,7 @@ class Earley(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
WithLexer.__init__(self, lexer_conf) | |||
rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | |||
rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | |||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
self.parser = earley.Parser(rules, | |||
@@ -29,6 +29,9 @@ class Derivation(Tree): | |||
Tree.__init__(self, 'drv', items or []) | |||
self.rule = rule | |||
def _pretty_label(self): # Nicer pretty for debugging the parser | |||
return self.rule.origin if self.rule else self.data | |||
END_TOKEN = EndToken() | |||
class Item(object): | |||
@@ -106,8 +109,11 @@ class Column: | |||
new_tree = old_tree.copy() | |||
new_tree.rule = old_tree.rule | |||
old_tree.set('_ambig', [new_tree]) | |||
old_tree.rule = None # No longer a 'drv' node | |||
if item.tree.children[0] is old_tree: # XXX a little hacky! | |||
raise ParseError("Infinite recursion in grammar!") | |||
raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule) | |||
old_tree.children.append(item.tree) | |||
else: | |||
self.completed[item] = item | |||
@@ -218,7 +224,13 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
return Tree(rule.origin, children) | |||
def _compare_rules(rule1, rule2): | |||
assert rule1.origin == rule2.origin | |||
if rule1.options and rule2.options: | |||
if rule1.options.priority is not None and rule2.options.priority is not None: | |||
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) | |||
return -compare(rule1.options.priority, rule2.options.priority) | |||
if rule1.origin != rule2.origin: | |||
return 0 | |||
c = compare( len(rule1.expansion), len(rule2.expansion)) | |||
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||
c = -c | |||
@@ -228,6 +240,20 @@ def _compare_drv(tree1, tree2): | |||
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||
return compare(tree1, tree2) | |||
try: | |||
rule1, rule2 = tree1.rule, tree2.rule | |||
except AttributeError: | |||
# Probably trees that don't take part in this parse (better way to distinguish?) | |||
return compare(tree1, tree2) | |||
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
# computationally inefficient. So we handle it here. | |||
if tree1.data == '_ambig': | |||
_resolve_ambig(tree1) | |||
if tree2.data == '_ambig': | |||
_resolve_ambig(tree2) | |||
c = _compare_rules(tree1.rule, tree2.rule) | |||
if c: | |||
return c | |||
@@ -241,12 +267,19 @@ def _compare_drv(tree1, tree2): | |||
return compare(len(tree1.children), len(tree2.children)) | |||
def _resolve_ambig(tree): | |||
assert tree.data == '_ambig' | |||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
assert best.data == 'drv' | |||
tree.set('drv', best.children) | |||
tree.rule = best.rule # needed for applying callbacks | |||
assert tree.data != '_ambig' | |||
class ResolveAmbig(Visitor_NoRecurse): | |||
def _ambig(self, tree): | |||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
assert best.data == 'drv' | |||
tree.set('drv', best.children) | |||
tree.rule = best.rule # needed for applying callbacks | |||
_resolve_ambig(tree) | |||
# RULES = [ | |||
@@ -7,10 +7,11 @@ class Rule(object): | |||
origin : a symbol | |||
expansion : a list of symbols | |||
""" | |||
def __init__(self, origin, expansion, alias=None): | |||
def __init__(self, origin, expansion, alias=None, options=None): | |||
self.origin = origin | |||
self.expansion = expansion | |||
self.alias = alias | |||
self.options = options | |||
def __repr__(self): | |||
return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) | |||
@@ -111,12 +112,12 @@ class GrammarAnalyzer(object): | |||
self.debug = debug | |||
rule_tuples = list(rule_tuples) | |||
rule_tuples.append(('$root', [start_symbol, '$end'])) | |||
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | |||
rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples] | |||
self.rules = set() | |||
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples} | |||
for origin, exp, alias in rule_tuples: | |||
r = Rule( origin, exp, alias ) | |||
self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples} | |||
for origin, exp, alias, options in rule_tuples: | |||
r = Rule( origin, exp, alias, options ) | |||
self.rules.add(r) | |||
self.rules_by_origin[origin].append(r) | |||
@@ -9,6 +9,7 @@ from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||
class Parser(object): | |||
def __init__(self, parser_conf): | |||
assert all(o is None or o.priority is None for n,x,a,o in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
self.analysis.compute_lookahead() | |||
self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
@@ -34,7 +35,7 @@ class Parser(object): | |||
raise UnexpectedToken(token, expected, seq, i) | |||
def reduce(rule, size): | |||
def reduce(rule, size, end=False): | |||
if size: | |||
s = value_stack[-size:] | |||
del state_stack[-size:] | |||
@@ -44,7 +45,7 @@ class Parser(object): | |||
res = self.callbacks[rule](s) | |||
if len(state_stack) == 1 and rule.origin == self.analysis.start_symbol: | |||
if end and len(state_stack) == 1 and rule.origin == self.analysis.start_symbol: | |||
return res | |||
_action, new_state = get_action(rule.origin) | |||
@@ -73,7 +74,7 @@ class Parser(object): | |||
while True: | |||
_action, rule = get_action('$end') | |||
assert _action == 'reduce' | |||
res = reduce(*rule) | |||
res = reduce(*rule, end=True) | |||
if res: | |||
assert state_stack == [self.analysis.init_state_idx] and not value_stack, len(state_stack) | |||
return res | |||
@@ -10,11 +10,14 @@ class Tree(object): | |||
def __repr__(self): | |||
return 'Tree(%s, %s)' % (self.data, self.children) | |||
def _pretty_label(self): | |||
return self.data | |||
def _pretty(self, level, indent_str): | |||
if len(self.children) == 1 and not isinstance(self.children[0], Tree): | |||
return [ indent_str*level, self.data, '\t', '%s' % self.children[0], '\n'] | |||
l = [ indent_str*level, self.data, '\n' ] | |||
l = [ indent_str*level, self._pretty_label(), '\n' ] | |||
for n in self.children: | |||
if isinstance(n, Tree): | |||
l += n._pretty(level+1, indent_str) | |||
@@ -62,10 +65,14 @@ class Tree(object): | |||
yield c | |||
def iter_subtrees(self): | |||
visited = set() | |||
q = [self] | |||
while q: | |||
subtree = q.pop() | |||
if id(subtree) in visited: | |||
continue # already been here from another branch | |||
visited.add(id(subtree)) | |||
yield subtree | |||
q += [c for c in subtree.children if isinstance(c, Tree)] | |||
@@ -1,6 +1,7 @@ | |||
import functools | |||
import types | |||
from collections import deque | |||
from contextlib import contextmanager | |||
class fzset(frozenset): | |||
def __repr__(self): | |||
@@ -63,11 +64,17 @@ def inline_args(f): | |||
def _f_builtin(_self, args): | |||
return f(*args) | |||
return _f_builtin | |||
else: | |||
@functools.wraps(f) | |||
elif isinstance(f, types.MethodType): | |||
@functools.wraps(f.__func__) | |||
def _f(self, args): | |||
return f.__func__(self, *args) | |||
return _f | |||
else: | |||
@functools.wraps(f.__call__.__func__) | |||
def _f(self, args): | |||
return f.__call__.__func__(self, *args) | |||
return _f | |||
try: | |||
@@ -82,5 +89,24 @@ except NameError: | |||
return -1 | |||
try: | |||
from contextlib import suppress # Python 3 | |||
except ImportError: | |||
@contextmanager | |||
def suppress(*excs): | |||
'''Catch and dismiss the provided exception | |||
>>> x = 'hello' | |||
>>> with suppress(IndexError): | |||
... x = x[10] | |||
>>> x | |||
'hello' | |||
''' | |||
try: | |||
yield | |||
except excs: | |||
pass | |||
@@ -380,6 +380,20 @@ def _make_parser_test(LEXER, PARSER): | |||
x = g.parse('Hello HelloWorld') | |||
self.assertSequenceEqual(x.children, ['HelloWorld']) | |||
def test_token_collision2(self): | |||
# NOTE: This test reveals a bug in token reconstruction in Scanless Earley | |||
# I probably need to re-write grammar transformation | |||
g = _Lark(""" | |||
!start: "starts" | |||
%import common.LCASE_LETTER | |||
""") | |||
x = g.parse("starts") | |||
self.assertSequenceEqual(x.children, ['starts']) | |||
# def test_string_priority(self): | |||
# g = _Lark("""start: (A | /a?bb/)+ | |||
# A: "a" """) | |||
@@ -539,6 +553,12 @@ def _make_parser_test(LEXER, PARSER): | |||
g.parse("+2e-9") | |||
self.assertRaises(ParseError, g.parse, "+2e-9e") | |||
def test_keep_all_tokens(self): | |||
l = _Lark("""start: "a"+ """, keep_all_tokens=True) | |||
tree = l.parse('aaa') | |||
self.assertEqual(tree.children, ['a', 'a', 'a']) | |||
def test_token_flags(self): | |||
l = _Lark("""!start: "a"i+ | |||
""" | |||
@@ -569,6 +589,14 @@ def _make_parser_test(LEXER, PARSER): | |||
tree = l.parse('AB,a') | |||
self.assertEqual(tree.children, ['AB']) | |||
def test_token_flags3(self): | |||
l = _Lark("""!start: ABC+ | |||
ABC: "abc"i | |||
""" | |||
) | |||
tree = l.parse('aBcAbC') | |||
self.assertEqual(tree.children, ['aBc', 'AbC']) | |||
def test_token_flags2(self): | |||
g = """!start: ("a"i | /a/ /b/?)+ | |||
""" | |||
@@ -577,6 +605,46 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(tree.children, ['a', 'A']) | |||
def test_reduce_cycle(self): | |||
"""Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state. | |||
It seems that the correct solution is to explicitely distinguish finalization in the reduce() function. | |||
""" | |||
l = _Lark(""" | |||
term: A | |||
| term term | |||
A: "a" | |||
""", start='term') | |||
tree = l.parse("aa") | |||
self.assertEqual(len(tree.children), 2) | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
start: a | b | |||
a.1: "a" | |||
b.2: "a" | |||
""" | |||
l = Lark(grammar, parser='earley', lexer='standard') | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'b') | |||
grammar = """ | |||
start: a | b | |||
a.2: "a" | |||
b.1: "a" | |||
""" | |||
l = Lark(grammar, parser='earley', lexer='standard') | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'a') | |||