| @@ -125,7 +125,8 @@ Lark has no dependencies. | |||
| ### Projects using Lark | |||
| - [mappyfile](https://github.com/geographika/mappyfile) - A pure Python MapFile parser for working with MapServer | |||
| - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration | |||
| - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer | |||
| Using Lark? Send me a message and I'll add your project! | |||
| @@ -251,6 +252,22 @@ Lark offers both Earley and LALR(1), which means you can choose between the most | |||
| Lark uses the [MIT license](LICENSE). | |||
| ## Contribute | |||
| Lark is currently accepting pull-requests. | |||
| There are many ways you can help the project: | |||
| * Improve the performance of Lark's parsing algorithm | |||
| * Implement macros for grammars (important for grammar composition) | |||
| * Write new grammars for Lark's library | |||
| * Write & improve the documentation | |||
| * Write a blog post introducing Lark to your audience | |||
| If you're interested in taking one of these on, let me know and I will provide more details and assist you in the process. | |||
| ## Contact | |||
| If you have any questions or want to contribute, you can email me at erezshin at gmail com. | |||
| If you have any questions or want my assistance, you can email me at erezshin at gmail com. | |||
| I'm also available for contract work. | |||
| @@ -116,11 +116,12 @@ AWAIT: "await" | |||
| | atom_expr "." NAME -> getattr | |||
| | atom | |||
| ?atom: "(" [yield_expr|testlist_comp] ")" | |||
| | "[" [testlist_comp] "]" | |||
| | "{" [dictorsetmaker] "}" | |||
| ?atom: "(" [yield_expr|testlist_comp] ")" -> tuple | |||
| | "[" [testlist_comp] "]" -> list | |||
| | "{" [dictorsetmaker] "}" -> dict | |||
| | NAME -> var | |||
| | number | string+ | "..." | |||
| | number | string+ | |||
| | "..." -> ellipsis | |||
| | "None" -> const_none | |||
| | "True" -> const_true | |||
| | "False" -> const_false | |||
| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
| from .lark import Lark | |||
| from .utils import inline_args | |||
| __version__ = "0.2.7" | |||
| __version__ = "0.2.10" | |||
| @@ -1,4 +1,5 @@ | |||
| import re | |||
| import sre_parse | |||
| class GrammarError(Exception): | |||
| pass | |||
| @@ -40,7 +41,7 @@ class LexerConf: | |||
| class ParserConf: | |||
| def __init__(self, rules, callback, start): | |||
| assert all(len(r)==3 for r in rules) | |||
| assert all(len(r) == 4 for r in rules) | |||
| self.rules = rules | |||
| self.callback = callback | |||
| self.start = start | |||
| @@ -57,9 +58,9 @@ class Pattern(object): | |||
| # Pattern Hashing assumes all subclasses have a different priority! | |||
| def __hash__(self): | |||
| return hash((self.priority, self.value)) | |||
| return hash((type(self), self.value)) | |||
| def __eq__(self, other): | |||
| return self.priority == other.priority and self.value == other.value | |||
| return type(self) == type(other) and self.value == other.value | |||
| def _get_flags(self): | |||
| if self.flags: | |||
| @@ -71,13 +72,21 @@ class PatternStr(Pattern): | |||
| def to_regexp(self): | |||
| return self._get_flags() + re.escape(self.value) | |||
| priority = 0 | |||
| @property | |||
| def min_width(self): | |||
| return len(self.value) | |||
| max_width = min_width | |||
| class PatternRE(Pattern): | |||
| def to_regexp(self): | |||
| return self._get_flags() + self.value | |||
| priority = 1 | |||
| @property | |||
| def min_width(self): | |||
| return sre_parse.parse(self.to_regexp()).getwidth()[0] | |||
| @property | |||
| def max_width(self): | |||
| return sre_parse.parse(self.to_regexp()).getwidth()[1] | |||
| class TokenDef(object): | |||
| def __init__(self, name, pattern): | |||
| @@ -39,6 +39,7 @@ class LarkOptions(object): | |||
| postlex - Lexer post-processing (Default: None) | |||
| start - The start symbol (Default: start) | |||
| profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | |||
| propagate_positions - Experimental. Don't use yet. | |||
| """ | |||
| __doc__ += OPTIONS_DOC | |||
| def __init__(self, options_dict): | |||
| @@ -55,14 +56,13 @@ class LarkOptions(object): | |||
| self.start = o.pop('start', 'start') | |||
| self.profile = o.pop('profile', False) | |||
| self.ambiguity = o.pop('ambiguity', 'auto') | |||
| self.propagate_positions = o.pop('propagate_positions', False) | |||
| assert self.parser in ('earley', 'lalr', None) | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | |||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
| if self.keep_all_tokens: | |||
| raise NotImplementedError("keep_all_tokens: Not implemented yet!") | |||
| if o: | |||
| raise ValueError("Unknown options: %s" % o.keys()) | |||
| @@ -119,7 +119,7 @@ class Lark: | |||
| assert isinstance(grammar, STRING_TYPE) | |||
| if self.options.cache_grammar or self.options.keep_all_tokens: | |||
| if self.options.cache_grammar: | |||
| raise NotImplementedError("Not available yet") | |||
| assert not self.options.profile, "Feature temporarily disabled" | |||
| @@ -142,8 +142,12 @@ class Lark: | |||
| assert self.options.parser == 'earley' | |||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||
| # Parse the grammar file and compose the grammars (TODO) | |||
| self.grammar = load_grammar(grammar, source) | |||
| # Compile the EBNF grammar into BNF | |||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) | |||
| self.ignore_tokens = self.grammar.extra['ignore'] | |||
| self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
| @@ -162,7 +166,7 @@ class Lark: | |||
| def _build_parser(self): | |||
| self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
| rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
| if self.profiler: | |||
| for f in dir(callback): | |||
| @@ -1,7 +1,6 @@ | |||
| ## Lexer Implementation | |||
| import re | |||
| import sre_parse | |||
| from .utils import Str, classify | |||
| from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
| @@ -120,8 +119,7 @@ class Lexer(object): | |||
| except: | |||
| raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | |||
| width = sre_parse.parse(t.pattern.to_regexp()).getwidth() | |||
| if width[0] == 0: | |||
| if t.pattern.min_width == 0: | |||
| raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern)) | |||
| token_names = {t.name for t in tokens} | |||
| @@ -133,7 +131,7 @@ class Lexer(object): | |||
| self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | |||
| self.ignore_types = [t for t in ignore] | |||
| tokens.sort(key=lambda x:(x.pattern.priority, len(x.pattern.value)), reverse=True) | |||
| tokens.sort(key=lambda x:x.pattern.max_width, reverse=True) | |||
| tokens, self.callback = _create_unless(tokens) | |||
| assert all(self.callback.values()) | |||
| @@ -155,17 +153,27 @@ class Lexer(object): | |||
| if m: | |||
| value = m.group(0) | |||
| type_ = type_from_index[m.lastindex] | |||
| if type_ not in ignore_types: | |||
| to_yield = type_ not in ignore_types | |||
| if to_yield: | |||
| t = Token(type_, value, lex_pos, line, lex_pos - col_start_pos) | |||
| end_col = t.column + len(value) | |||
| if t.type in self.callback: | |||
| t = self.callback[t.type](t) | |||
| yield t | |||
| if type_ in newline_types: | |||
| newlines = value.count(self.newline_char) | |||
| if newlines: | |||
| line += newlines | |||
| col_start_pos = lex_pos + value.rindex(self.newline_char) | |||
| last_newline_index = value.rindex(self.newline_char) + 1 | |||
| col_start_pos = lex_pos + last_newline_index | |||
| end_col = len(value) - last_newline_index | |||
| if to_yield: | |||
| t.end_line = line | |||
| t.end_col = end_col | |||
| yield t | |||
| lex_pos += len(value) | |||
| break | |||
| else: | |||
| @@ -75,6 +75,7 @@ TOKENS = { | |||
| '_TO': '->', | |||
| '_IGNORE': r'%ignore', | |||
| '_IMPORT': r'%import', | |||
| 'NUMBER': '\d+', | |||
| } | |||
| RULES = { | |||
| @@ -82,7 +83,8 @@ RULES = { | |||
| '_list': ['_item', '_list _item'], | |||
| '_item': ['rule', 'token', 'statement', '_NL'], | |||
| 'rule': ['RULE _COLON expansions _NL'], | |||
| 'rule': ['RULE _COLON expansions _NL', | |||
| 'RULE _DOT NUMBER _COLON expansions _NL'], | |||
| 'expansions': ['alias', | |||
| 'expansions _OR alias', | |||
| 'expansions _NL _OR alias'], | |||
| @@ -313,7 +315,7 @@ class PrepareLiterals(InlineTransformer): | |||
| class SplitLiterals(InlineTransformer): | |||
| def pattern(self, p): | |||
| if isinstance(p, PatternStr) and len(p.value)>1: | |||
| return T('expansion', [T('pattern', [PatternStr(ch)]) for ch in p.value]) | |||
| return T('expansion', [T('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value]) | |||
| return T('pattern', [p]) | |||
| class TokenTreeToPattern(Transformer): | |||
| @@ -470,21 +472,29 @@ class Grammar: | |||
| class RuleOptions: | |||
| def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False): | |||
| def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): | |||
| self.keep_all_tokens = keep_all_tokens | |||
| self.expand1 = expand1 | |||
| self.create_token = create_token # used for scanless postprocessing | |||
| self.priority = priority | |||
| self.filter_out = filter_out # remove this rule from the tree | |||
| # used for "token"-rules in scanless | |||
| @classmethod | |||
| def from_rule(cls, name, expansions): | |||
| def from_rule(cls, name, *x): | |||
| if len(x) > 1: | |||
| priority, expansions = x | |||
| priority = int(priority) | |||
| else: | |||
| expansions ,= x | |||
| priority = None | |||
| keep_all_tokens = name.startswith('!') | |||
| name = name.lstrip('!') | |||
| expand1 = name.startswith('?') | |||
| name = name.lstrip('?') | |||
| return name, expansions, cls(keep_all_tokens, expand1) | |||
| return name, expansions, cls(keep_all_tokens, expand1, priority=priority) | |||
| @@ -605,7 +615,7 @@ class GrammarLoader: | |||
| raise GrammarError("Token '%s' defined more than once" % name) | |||
| token_names.add(name) | |||
| rules = [RuleOptions.from_rule(name, x) for name, x in rule_defs] | |||
| rules = [RuleOptions.from_rule(*x) for x in rule_defs] | |||
| rule_names = set() | |||
| for name, _x, _o in rules: | |||
| @@ -1,4 +1,5 @@ | |||
| from .common import is_terminal, GrammarError | |||
| from .utils import suppress | |||
| from .lexer import Token | |||
| class Callback(object): | |||
| @@ -42,10 +43,32 @@ def create_rule_handler(expansion, usermethod, keep_all_tokens, filter_out): | |||
| # else, if no filtering required.. | |||
| return usermethod | |||
| def propagate_positions_wrapper(f): | |||
| def _f(args): | |||
| res = f(args) | |||
| if args: | |||
| for a in args: | |||
| with suppress(AttributeError): | |||
| res.line = a.line | |||
| res.column = a.column | |||
| break | |||
| for a in reversed(args): | |||
| with suppress(AttributeError): | |||
| res.end_line = a.end_line | |||
| res.end_col = a.end_col | |||
| break | |||
| return res | |||
| return _f | |||
| class ParseTreeBuilder: | |||
| def __init__(self, tree_class): | |||
| def __init__(self, tree_class, propagate_positions=False, keep_all_tokens=False): | |||
| self.tree_class = tree_class | |||
| self.propagate_positions = propagate_positions | |||
| self.always_keep_all_tokens = keep_all_tokens | |||
| def _create_tree_builder_function(self, name): | |||
| tree_class = self.tree_class | |||
| @@ -66,7 +89,7 @@ class ParseTreeBuilder: | |||
| filter_out.add(origin) | |||
| for origin, (expansions, options) in rules.items(): | |||
| keep_all_tokens = options.keep_all_tokens if options else False | |||
| keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) | |||
| expand1 = options.expand1 if options else False | |||
| create_token = options.create_token if options else False | |||
| @@ -92,11 +115,14 @@ class ParseTreeBuilder: | |||
| alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out) | |||
| if self.propagate_positions: | |||
| alias_handler = propagate_positions_wrapper(alias_handler) | |||
| callback_name = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
| if hasattr(callback, callback_name): | |||
| raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
| setattr(callback, callback_name, alias_handler) | |||
| new_rules.append(( _origin, expansion, callback_name )) | |||
| new_rules.append(( _origin, expansion, callback_name, options )) | |||
| return new_rules, callback | |||
| @@ -131,7 +131,7 @@ class Earley_NoLex: | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
| rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | |||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
| self.parser = earley.Parser(rules, | |||
| @@ -158,7 +158,7 @@ class Earley(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | |||
| rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | |||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
| self.parser = earley.Parser(rules, | |||
| @@ -29,6 +29,9 @@ class Derivation(Tree): | |||
| Tree.__init__(self, 'drv', items or []) | |||
| self.rule = rule | |||
| def _pretty_label(self): # Nicer pretty for debugging the parser | |||
| return self.rule.origin if self.rule else self.data | |||
| END_TOKEN = EndToken() | |||
| class Item(object): | |||
| @@ -106,8 +109,11 @@ class Column: | |||
| new_tree = old_tree.copy() | |||
| new_tree.rule = old_tree.rule | |||
| old_tree.set('_ambig', [new_tree]) | |||
| old_tree.rule = None # No longer a 'drv' node | |||
| if item.tree.children[0] is old_tree: # XXX a little hacky! | |||
| raise ParseError("Infinite recursion in grammar!") | |||
| raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule) | |||
| old_tree.children.append(item.tree) | |||
| else: | |||
| self.completed[item] = item | |||
| @@ -218,7 +224,13 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
| return Tree(rule.origin, children) | |||
| def _compare_rules(rule1, rule2): | |||
| assert rule1.origin == rule2.origin | |||
| if rule1.options and rule2.options: | |||
| if rule1.options.priority is not None and rule2.options.priority is not None: | |||
| assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) | |||
| return -compare(rule1.options.priority, rule2.options.priority) | |||
| if rule1.origin != rule2.origin: | |||
| return 0 | |||
| c = compare( len(rule1.expansion), len(rule2.expansion)) | |||
| if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||
| c = -c | |||
| @@ -228,6 +240,20 @@ def _compare_drv(tree1, tree2): | |||
| if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||
| return compare(tree1, tree2) | |||
| try: | |||
| rule1, rule2 = tree1.rule, tree2.rule | |||
| except AttributeError: | |||
| # Probably trees that don't take part in this parse (better way to distinguish?) | |||
| return compare(tree1, tree2) | |||
| # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
| # computationally inefficient. So we handle it here. | |||
| if tree1.data == '_ambig': | |||
| _resolve_ambig(tree1) | |||
| if tree2.data == '_ambig': | |||
| _resolve_ambig(tree2) | |||
| c = _compare_rules(tree1.rule, tree2.rule) | |||
| if c: | |||
| return c | |||
| @@ -241,12 +267,19 @@ def _compare_drv(tree1, tree2): | |||
| return compare(len(tree1.children), len(tree2.children)) | |||
| def _resolve_ambig(tree): | |||
| assert tree.data == '_ambig' | |||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
| assert best.data == 'drv' | |||
| tree.set('drv', best.children) | |||
| tree.rule = best.rule # needed for applying callbacks | |||
| assert tree.data != '_ambig' | |||
| class ResolveAmbig(Visitor_NoRecurse): | |||
| def _ambig(self, tree): | |||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
| assert best.data == 'drv' | |||
| tree.set('drv', best.children) | |||
| tree.rule = best.rule # needed for applying callbacks | |||
| _resolve_ambig(tree) | |||
| # RULES = [ | |||
| @@ -7,10 +7,11 @@ class Rule(object): | |||
| origin : a symbol | |||
| expansion : a list of symbols | |||
| """ | |||
| def __init__(self, origin, expansion, alias=None): | |||
| def __init__(self, origin, expansion, alias=None, options=None): | |||
| self.origin = origin | |||
| self.expansion = expansion | |||
| self.alias = alias | |||
| self.options = options | |||
| def __repr__(self): | |||
| return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) | |||
| @@ -111,12 +112,12 @@ class GrammarAnalyzer(object): | |||
| self.debug = debug | |||
| rule_tuples = list(rule_tuples) | |||
| rule_tuples.append(('$root', [start_symbol, '$end'])) | |||
| rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | |||
| rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples] | |||
| self.rules = set() | |||
| self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples} | |||
| for origin, exp, alias in rule_tuples: | |||
| r = Rule( origin, exp, alias ) | |||
| self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples} | |||
| for origin, exp, alias, options in rule_tuples: | |||
| r = Rule( origin, exp, alias, options ) | |||
| self.rules.add(r) | |||
| self.rules_by_origin[origin].append(r) | |||
| @@ -9,6 +9,7 @@ from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||
| class Parser(object): | |||
| def __init__(self, parser_conf): | |||
| assert all(o is None or o.priority is None for n,x,a,o in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
| self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
| self.analysis.compute_lookahead() | |||
| self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
| @@ -34,7 +35,7 @@ class Parser(object): | |||
| raise UnexpectedToken(token, expected, seq, i) | |||
| def reduce(rule, size): | |||
| def reduce(rule, size, end=False): | |||
| if size: | |||
| s = value_stack[-size:] | |||
| del state_stack[-size:] | |||
| @@ -44,7 +45,7 @@ class Parser(object): | |||
| res = self.callbacks[rule](s) | |||
| if len(state_stack) == 1 and rule.origin == self.analysis.start_symbol: | |||
| if end and len(state_stack) == 1 and rule.origin == self.analysis.start_symbol: | |||
| return res | |||
| _action, new_state = get_action(rule.origin) | |||
| @@ -73,7 +74,7 @@ class Parser(object): | |||
| while True: | |||
| _action, rule = get_action('$end') | |||
| assert _action == 'reduce' | |||
| res = reduce(*rule) | |||
| res = reduce(*rule, end=True) | |||
| if res: | |||
| assert state_stack == [self.analysis.init_state_idx] and not value_stack, len(state_stack) | |||
| return res | |||
| @@ -10,11 +10,14 @@ class Tree(object): | |||
| def __repr__(self): | |||
| return 'Tree(%s, %s)' % (self.data, self.children) | |||
| def _pretty_label(self): | |||
| return self.data | |||
| def _pretty(self, level, indent_str): | |||
| if len(self.children) == 1 and not isinstance(self.children[0], Tree): | |||
| return [ indent_str*level, self.data, '\t', '%s' % self.children[0], '\n'] | |||
| l = [ indent_str*level, self.data, '\n' ] | |||
| l = [ indent_str*level, self._pretty_label(), '\n' ] | |||
| for n in self.children: | |||
| if isinstance(n, Tree): | |||
| l += n._pretty(level+1, indent_str) | |||
| @@ -62,10 +65,14 @@ class Tree(object): | |||
| yield c | |||
| def iter_subtrees(self): | |||
| visited = set() | |||
| q = [self] | |||
| while q: | |||
| subtree = q.pop() | |||
| if id(subtree) in visited: | |||
| continue # already been here from another branch | |||
| visited.add(id(subtree)) | |||
| yield subtree | |||
| q += [c for c in subtree.children if isinstance(c, Tree)] | |||
| @@ -1,6 +1,7 @@ | |||
| import functools | |||
| import types | |||
| from collections import deque | |||
| from contextlib import contextmanager | |||
| class fzset(frozenset): | |||
| def __repr__(self): | |||
| @@ -63,11 +64,17 @@ def inline_args(f): | |||
| def _f_builtin(_self, args): | |||
| return f(*args) | |||
| return _f_builtin | |||
| else: | |||
| @functools.wraps(f) | |||
| elif isinstance(f, types.MethodType): | |||
| @functools.wraps(f.__func__) | |||
| def _f(self, args): | |||
| return f.__func__(self, *args) | |||
| return _f | |||
| else: | |||
| @functools.wraps(f.__call__.__func__) | |||
| def _f(self, args): | |||
| return f.__call__.__func__(self, *args) | |||
| return _f | |||
| try: | |||
| @@ -82,5 +89,24 @@ except NameError: | |||
| return -1 | |||
| try: | |||
| from contextlib import suppress # Python 3 | |||
| except ImportError: | |||
| @contextmanager | |||
| def suppress(*excs): | |||
| '''Catch and dismiss the provided exception | |||
| >>> x = 'hello' | |||
| >>> with suppress(IndexError): | |||
| ... x = x[10] | |||
| >>> x | |||
| 'hello' | |||
| ''' | |||
| try: | |||
| yield | |||
| except excs: | |||
| pass | |||
| @@ -380,6 +380,20 @@ def _make_parser_test(LEXER, PARSER): | |||
| x = g.parse('Hello HelloWorld') | |||
| self.assertSequenceEqual(x.children, ['HelloWorld']) | |||
| def test_token_collision2(self): | |||
| # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | |||
| # I probably need to re-write grammar transformation | |||
| g = _Lark(""" | |||
| !start: "starts" | |||
| %import common.LCASE_LETTER | |||
| """) | |||
| x = g.parse("starts") | |||
| self.assertSequenceEqual(x.children, ['starts']) | |||
| # def test_string_priority(self): | |||
| # g = _Lark("""start: (A | /a?bb/)+ | |||
| # A: "a" """) | |||
| @@ -539,6 +553,12 @@ def _make_parser_test(LEXER, PARSER): | |||
| g.parse("+2e-9") | |||
| self.assertRaises(ParseError, g.parse, "+2e-9e") | |||
| def test_keep_all_tokens(self): | |||
| l = _Lark("""start: "a"+ """, keep_all_tokens=True) | |||
| tree = l.parse('aaa') | |||
| self.assertEqual(tree.children, ['a', 'a', 'a']) | |||
| def test_token_flags(self): | |||
| l = _Lark("""!start: "a"i+ | |||
| """ | |||
| @@ -569,6 +589,14 @@ def _make_parser_test(LEXER, PARSER): | |||
| tree = l.parse('AB,a') | |||
| self.assertEqual(tree.children, ['AB']) | |||
| def test_token_flags3(self): | |||
| l = _Lark("""!start: ABC+ | |||
| ABC: "abc"i | |||
| """ | |||
| ) | |||
| tree = l.parse('aBcAbC') | |||
| self.assertEqual(tree.children, ['aBc', 'AbC']) | |||
| def test_token_flags2(self): | |||
| g = """!start: ("a"i | /a/ /b/?)+ | |||
| """ | |||
| @@ -577,6 +605,46 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(tree.children, ['a', 'A']) | |||
| def test_reduce_cycle(self): | |||
| """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state. | |||
| It seems that the correct solution is to explicitely distinguish finalization in the reduce() function. | |||
| """ | |||
| l = _Lark(""" | |||
| term: A | |||
| | term term | |||
| A: "a" | |||
| """, start='term') | |||
| tree = l.parse("aa") | |||
| self.assertEqual(len(tree.children), 2) | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
| def test_earley_prioritization(self): | |||
| "Tests effect of priority on result" | |||
| grammar = """ | |||
| start: a | b | |||
| a.1: "a" | |||
| b.2: "a" | |||
| """ | |||
| l = Lark(grammar, parser='earley', lexer='standard') | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'b') | |||
| grammar = """ | |||
| start: a | b | |||
| a.2: "a" | |||
| b.1: "a" | |||
| """ | |||
| l = Lark(grammar, parser='earley', lexer='standard') | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'a') | |||