From 5b30ba484105b66f4596ada259a9d96d679d8d8c Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 12 Nov 2020 10:30:00 +0200 Subject: [PATCH] Cleanup, and a few PEP8 changes --- lark-stubs/lexer.pyi | 26 ++++++++---- lark/common.py | 4 +- lark/exceptions.py | 4 ++ lark/grammar.py | 3 -- lark/lark.py | 9 +++-- lark/lexer.py | 30 +++++++++----- lark/load_grammar.py | 82 ++++++++++++++++++++------------------ lark/parse_tree_builder.py | 25 ++++++++---- lark/parsers/earley.py | 4 +- lark/tree.py | 16 +++++--- lark/utils.py | 22 +++++----- lark/visitors.py | 23 +++++------ 12 files changed, 141 insertions(+), 107 deletions(-) diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi index ae7d68a..12d3dfe 100644 --- a/lark-stubs/lexer.pyi +++ b/lark-stubs/lexer.pyi @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from types import ModuleType from typing import ( - TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, + TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, Pattern as REPattern, ) from abc import abstractmethod, ABC @@ -100,10 +100,22 @@ class Lexer(ABC): lex: Callable[..., Iterator[Token]] +class LexerConf: + tokens: Collection[TerminalDef] + re_module: ModuleType + ignore: Collection[str] = () + postlex: Any =None + callbacks: Optional[Dict[str, _Callback]] = None + g_regex_flags: int = 0 + skip_validation: bool = False + use_bytes: bool = False + + + class TraditionalLexer(Lexer): terminals: Collection[TerminalDef] - ignore_types: List[str] - newline_types: List[str] + ignore_types: FrozenSet[str] + newline_types: FrozenSet[str] user_callbacks: Dict[str, _Callback] callback: Dict[str, _Callback] mres: List[Tuple[REPattern, Dict[int, str]]] @@ -111,11 +123,7 @@ class TraditionalLexer(Lexer): def __init__( self, - terminals: Collection[TerminalDef], - re_: ModuleType, - ignore: Collection[str] = ..., - user_callbacks: Dict[str, _Callback] = ..., - g_regex_flags: int = ... + conf: LexerConf ): ... @@ -128,6 +136,8 @@ class TraditionalLexer(Lexer): def lex(self, stream: str) -> Iterator[Token]: ... + def next_token(self, lex_state: Any) -> Token: + ... class ContextualLexer(Lexer): lexers: Dict[str, TraditionalLexer] diff --git a/lark/common.py b/lark/common.py index 714399a..4bf04ec 100644 --- a/lark/common.py +++ b/lark/common.py @@ -3,6 +3,7 @@ from .lexer import TerminalDef ###{standalone + class LexerConf(Serialize): __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' __serialize_namespace__ = TerminalDef, @@ -19,11 +20,10 @@ class LexerConf(Serialize): ###} + class ParserConf: def __init__(self, rules, callbacks, start): assert isinstance(start, list) self.rules = rules self.callbacks = callbacks self.start = start - - diff --git a/lark/exceptions.py b/lark/exceptions.py index 79629e6..72af526 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger class LarkError(Exception): pass + class GrammarError(LarkError): pass + class ParseError(LarkError): pass + class LexError(LarkError): pass + class UnexpectedEOF(ParseError): def __init__(self, expected): self.expected = expected diff --git a/lark/grammar.py b/lark/grammar.py index bb84351..405086a 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -40,14 +40,12 @@ class Terminal(Symbol): return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) - class NonTerminal(Symbol): __serialize_fields__ = 'name', is_term = False - class RuleOptions(Serialize): __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' @@ -104,5 +102,4 @@ class Rule(Serialize): return self.origin == other.origin and self.expansion == other.expansion - ###} diff --git a/lark/lark.py b/lark/lark.py index b8a1231..cc431ca 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -24,6 +24,7 @@ except ImportError: ###{standalone + class LarkOptions(Serialize): """Specifies the options for Lark @@ -36,7 +37,7 @@ class LarkOptions(Serialize): debug Display debug information, such as warnings (default: False) transformer - Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) + Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) propagate_positions Propagates (line, column, end_line, end_column) attributes into all tree branches. maybe_placeholders @@ -320,7 +321,7 @@ class Lark(Serialize): # Else, if the user asked to disable priorities, strip them from the # rules. This allows the Earley parsers to skip an extra forest walk # for improved performance, if you don't need them (or didn't specify any). - elif self.options.priority == None: + elif self.options.priority is None: for rule in self.rules: if rule.options.priority is not None: rule.options.priority = None @@ -360,7 +361,7 @@ class Lark(Serialize): self.rules, self.options.tree_class or Tree, self.options.propagate_positions, - self.options.parser!='lalr' and self.options.ambiguity=='explicit', + self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', self.options.maybe_placeholders ) self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) @@ -410,7 +411,7 @@ class Lark(Serialize): data['parser'], memo, self._callbacks, - self.options, # Not all, but multiple attributes are used + self.options, # Not all, but multiple attributes are used ) self.terminals = self.parser.lexer_conf.tokens self._terminals_dict = {t.name: t for t in self.terminals} diff --git a/lark/lexer.py b/lark/lexer.py index b080921..4c420e7 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -1,4 +1,4 @@ -## Lexer Implementation +# Lexer Implementation import re @@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken ###{standalone from copy import copy + class Pattern(Serialize): def __init__(self, value, flags=()): @@ -20,6 +21,7 @@ class Pattern(Serialize): # Pattern Hashing assumes all subclasses have a different priority! def __hash__(self): return hash((type(self), self.value, self.flags)) + def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags @@ -53,6 +55,7 @@ class PatternStr(Pattern): return len(self.value) max_width = min_width + class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', '_width' @@ -70,6 +73,7 @@ class PatternRE(Pattern): @property def min_width(self): return self._get_width()[0] + @property def max_width(self): return self._get_width()[1] @@ -139,7 +143,7 @@ class Token(Str): return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): - return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) + return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) def __repr__(self): return 'Token(%r, %r)' % (self.type, self.value) @@ -193,6 +197,7 @@ class UnlessCallback: break return t + class CallChain: def __init__(self, callback1, callback2, cond): self.callback1 = callback1 @@ -204,16 +209,13 @@ class CallChain: return self.callback2(t) if self.cond(t2) else t2 - - - def _create_unless(terminals, g_regex_flags, re_, use_bytes): tokens_by_type = classify(terminals, lambda t: type(t.pattern)) assert len(tokens_by_type) <= 2, tokens_by_type.keys() embedded_strs = set() callback = {} for retok in tokens_by_type.get(PatternRE, []): - unless = [] # {} + unless = [] for strtok in tokens_by_type.get(PatternStr, []): if strtok.priority > retok.priority: continue @@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes) except AssertionError: # Yes, this is what Python provides us.. :/ return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) - mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) + mres.append((mre, {i: n for n, i in mre.groupindex.items()})) terminals = terminals[max_size:] return mres + def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) + def _regexp_has_newline(r): r"""Expressions that may indicate newlines in a regexp: - newlines (\n) @@ -262,6 +266,7 @@ def _regexp_has_newline(r): """ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) + class Lexer(object): """Lexer interface @@ -300,7 +305,7 @@ class TraditionalLexer(Lexer): self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) self.ignore_types = frozenset(conf.ignore) - terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) + terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) self.terminals = terminals self.user_callbacks = conf.callbacks self.g_regex_flags = conf.g_regex_flags @@ -309,7 +314,7 @@ class TraditionalLexer(Lexer): self._mres = None def _build(self): - terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes) + terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) assert all(self.callback.values()) for type_, f in self.user_callbacks.items(): @@ -333,7 +338,7 @@ class TraditionalLexer(Lexer): if m: return m.group(0), type_from_index[m.lastindex] - def lex(self, state, parser_state): + def lex(self, state, _parser_state): with suppress(EOFError): while True: yield self.next_token(state) @@ -372,6 +377,7 @@ class TraditionalLexer(Lexer): # EOF raise EOFError(self) + class LexerState: __slots__ = 'text', 'line_ctr', 'last_token' @@ -383,6 +389,7 @@ class LexerState: def __copy__(self): return type(self)(self.text, copy(self.line_ctr), self.last_token) + class ContextualLexer(Lexer): def __init__(self, conf, states, always_accept=()): @@ -430,8 +437,9 @@ class ContextualLexer(Lexer): token = self.root_lexer.next_token(lexer_state) raise UnexpectedToken(token, e.allowed, state=parser_state.position) + class LexerThread: - "A thread that ties a lexer instance and a lexer state, to be used by the parser" + """A thread that ties a lexer instance and a lexer state, to be used by the parser""" def __init__(self, lexer, text): self.lexer = lexer diff --git a/lark/load_grammar.py b/lark/load_grammar.py index eb0273c..4b962fe 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -1,4 +1,4 @@ -"Parses and creates Grammar objects" +"""Parses and creates Grammar objects""" import os.path import sys @@ -166,6 +166,7 @@ RULES = { 'literal': ['REGEXP', 'STRING'], } + @inline_args class EBNF_to_BNF(Transformer_InPlace): def __init__(self): @@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor): for i, child in enumerate(tree.children): if isinstance(child, Tree) and child.data == 'expansions': tree.data = 'expansions' - tree.children = [self.visit(ST('expansion', [option if i==j else other - for j, other in enumerate(tree.children)])) - for option in dedup_list(child.children)] + tree.children = [self.visit(ST('expansion', [option if i == j else other + for j, other in enumerate(tree.children)])) + for option in dedup_list(child.children)] self._flatten(tree) break @@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor): class RuleTreeToText(Transformer): def expansions(self, x): return x + def expansion(self, symbols): return symbols, None + def alias(self, x): (expansion, _alias), alias = x assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed @@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace): tokenmods, value = args return tokenmods + [value] + class PrepareAnonTerminals(Transformer_InPlace): - "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" + """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" def __init__(self, terminals): self.terminals = terminals @@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace): self.i = 0 self.rule_options = None - @inline_args def pattern(self, p): value = p.value @@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace): except KeyError: if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: with suppress(UnicodeEncodeError): - value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names + value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names term_name = value.upper() if term_name in self.term_set: term_name = None elif isinstance(p, PatternRE): - if p in self.term_reverse: # Kind of a weird placement.name + if p in self.term_reverse: # Kind of a weird placement.name term_name = self.term_reverse[p].name else: assert False, p @@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace): class _ReplaceSymbols(Transformer_InPlace): - " Helper for ApplyTemplates " + """Helper for ApplyTemplates""" def __init__(self): self.names = {} @@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace): return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) return self.__default__('template_usage', c, None) + class ApplyTemplates(Transformer_InPlace): - " Apply the templates, creating new rules that represent the used templates " + """Apply the templates, creating new rules that represent the used templates""" def __init__(self, rule_defs): self.rule_defs = rule_defs @@ -401,8 +405,6 @@ def _rfind(s, choices): return max(s.rfind(c) for c in choices) - - def _literal_to_pattern(literal): v = literal.value flag_start = _rfind(v, '/"')+1 @@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace): assert start.type == end.type == 'STRING' start = start.value[1:-1] end = end.value[1:-1] - assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end))) + assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 regexp = '[%s-%s]' % (start, end) return ST('pattern', [PatternRE(regexp)]) @@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set): return PatternRE(regexp, flags) + class TerminalTreeToPattern(Transformer): def pattern(self, ps): p ,= ps @@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer): def value(self, v): return v[0] + class PrepareSymbols(Transformer_InPlace): def value(self, v): v ,= v @@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace): return Terminal(Str(v.value), filter_out=v.startswith('_')) assert False + def _choice_of_rules(rules): return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) + def nr_deepcopy_tree(t): - "Deepcopy tree `t` without recursion" + """Deepcopy tree `t` without recursion""" return Transformer_NonRecursive(False).transform(t) + class Grammar: def __init__(self, rule_defs, term_defs, ignore): self.term_defs = term_defs @@ -547,7 +554,7 @@ class Grammar: raise GrammarError("Terminals cannot be empty (%s)" % name) transformer = PrepareLiterals() * TerminalTreeToPattern() - terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) + terminals = [TerminalDef(name, transformer.transform(term_tree), priority) for name, (term_tree, priority) in term_defs if term_tree] # ================= @@ -566,10 +573,10 @@ class Grammar: ebnf_to_bnf = EBNF_to_BNF() rules = [] i = 0 - while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates + while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates name, params, rule_tree, options = rule_defs[i] i += 1 - if len(params) != 0: # Dont transform templates + if len(params) != 0: # Dont transform templates continue rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None ebnf_to_bnf.rule_options = rule_options @@ -594,7 +601,7 @@ class Grammar: for i, (expansion, alias) in enumerate(expansions): if alias and name.startswith('_'): - raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) + raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) empty_indices = [x==_EMPTY for x in expansion] if any(empty_indices): @@ -623,14 +630,13 @@ class Grammar: # Remove duplicates compiled_rules = list(set(compiled_rules)) - # Filter out unused rules while True: c = len(compiled_rules) used_rules = {s for r in compiled_rules - for s in r.expansion - if isinstance(s, NonTerminal) - and s != r.origin} + for s in r.expansion + if isinstance(s, NonTerminal) + and s != r.origin} used_rules |= {NonTerminal(s) for s in start} compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) for r in unused: @@ -663,6 +669,7 @@ class PackageResource(object): def __repr__(self): return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) + class FromPackageLoader(object): """ Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. @@ -699,11 +706,12 @@ class FromPackageLoader(object): return PackageResource(self.pkg_name, full_path), text.decode() raise IOError() -stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) +stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) _imported_grammars = {} + def import_from_grammar_into_namespace(grammar, namespace, aliases): """Returns all rules and terminals of grammar, prepended with a 'namespace' prefix, except for those which are aliased. @@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) return _find_used_symbols(tree) - set(params) - - def get_namespace_name(name, params): if params is not None: try: @@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): else: assert symbol.type == 'RULE' _, params, tree, options = imported_rules[symbol] - params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} + params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} for t in tree.iter_subtrees(): for i, c in enumerate(t.children): if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): t.children[i] = Token(c.type, get_namespace_name(c, params_map)) - params = [params_map[p] for p in params] # We can not rely on ordered dictionaries + params = [params_map[p] for p in params] # We can not rely on ordered dictionaries rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) - return term_defs, rule_defs - def resolve_term_references(term_defs): # TODO Solve with transitive closure (maybe) @@ -798,7 +802,7 @@ def options_from_rule(name, params, *x): else: expansions ,= x priority = None - params = [t.value for t in params.children] if params is not None else [] # For the grammar parser + params = [t.value for t in params.children] if params is not None else [] # For the grammar parser keep_all_tokens = name.startswith('!') name = name.lstrip('!') @@ -812,10 +816,12 @@ def options_from_rule(name, params, *x): def symbols_from_strcase(expansion): return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] + @inline_args class PrepareGrammar(Transformer_InPlace): def terminal(self, name): return name + def nonterminal(self, name): return name @@ -825,10 +831,11 @@ def _find_used_symbols(tree): return {t for x in tree.find_data('expansion') for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} + class GrammarLoader: ERRORS = [ ('Unclosed parenthesis', ['a: (\n']), - ('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), + ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), ('Illegal name for rules or terminals', ['Aa:\n']), ('Alias expects lowercase name', ['a: -> "a"\n']), @@ -843,8 +850,9 @@ class GrammarLoader: def __init__(self, global_keep_all_tokens): terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] - rules = [options_from_rule(name, None, x) for name, x in RULES.items()] - rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] + rules = [options_from_rule(name, None, x) for name, x in RULES.items()] + rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) + for r, _p, xs, o in rules for i, x in enumerate(xs)] callback = ParseTreeBuilder(rules, ST).create_callback() import re lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) @@ -881,10 +889,10 @@ class GrammarLoader: return _imported_grammars[grammar_path] def load_grammar(self, grammar_text, grammar_name='', import_paths=[]): - "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." + """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" try: - tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) + tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) except UnexpectedCharacters as e: context = e.get_context(grammar_text) raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % @@ -1037,7 +1045,7 @@ class GrammarLoader: raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) if len(args) != rule_names[sym]: raise GrammarError("Wrong number of template arguments used for %s " - "(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) + "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) for sym in _find_used_symbols(expansions): if sym.type == 'TERMINAL': if sym not in terminal_names: @@ -1046,10 +1054,8 @@ class GrammarLoader: if sym not in rule_names and sym not in params: raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) - return Grammar(rules, term_defs, ignore_names) - def load_grammar(grammar, source, import_paths, global_keep_all_tokens): return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 46bc601..569761a 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -1,7 +1,7 @@ from .exceptions import GrammarError from .lexer import Token from .tree import Tree -from .visitors import InlineTransformer # XXX Deprecated +from .visitors import InlineTransformer # XXX Deprecated from .visitors import Transformer_InPlace from .visitors import _vargs_meta, _vargs_meta_inline @@ -20,6 +20,7 @@ class ExpandSingleChild: else: return self.node_builder(children) + class PropagatePositions: def __init__(self, node_builder): self.node_builder = node_builder @@ -87,8 +88,9 @@ class ChildFilter: return self.node_builder(filtered) + class ChildFilterLALR(ChildFilter): - "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" def __call__(self, children): filtered = [] @@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter): return self.node_builder(filtered) + class ChildFilterLALR_NoPlaceholders(ChildFilter): "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" def __init__(self, to_include, node_builder): @@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter): filtered.append(children[i]) return self.node_builder(filtered) + def _should_expand(sym): return not sym.is_term and sym.name.startswith('_') + def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): # Prepare empty_indices as: How many Nones to insert at each index? if _empty_indices: @@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi # LALR without placeholders return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) + class AmbiguousExpander: """Deal with the case where we're expanding children ('_rule') into a parent but the children are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself @@ -167,10 +173,10 @@ class AmbiguousExpander: self.to_expand = to_expand def __call__(self, children): - def _is_ambig_tree(child): - return hasattr(child, 'data') and child.data == '_ambig' + def _is_ambig_tree(t): + return hasattr(t, 'data') and t.data == '_ambig' - #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. + # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. # All children of an _ambig node should be a derivation of that ambig node, hence # it is safe to assume that if we see an _ambig node nested within an ambig node # it is safe to simply expand it into the parent _ambig node as an alternative derivation. @@ -186,15 +192,17 @@ class AmbiguousExpander: if not ambiguous: return self.node_builder(children) - expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] + expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) + def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): to_expand = [i for i, sym in enumerate(expansion) if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] if to_expand: return partial(AmbiguousExpander, to_expand, tree_class) + class AmbiguousIntermediateExpander: """ Propagate ambiguous intermediate nodes and their derivations up to the @@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander: return self.node_builder(children) + def ptb_inline_args(func): @wraps(func) def f(children): return func(*children) return f + def inplace_transformer(func): @wraps(func) def f(children): @@ -289,9 +299,11 @@ def inplace_transformer(func): return func(tree) return f + def apply_visit_wrapper(func, name, wrapper): if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: raise NotImplementedError("Meta args not supported for internal transformer") + @wraps(func) def f(children): return wrapper(func, name, children, None) @@ -323,7 +335,6 @@ class ParseTreeBuilder: yield rule, wrapper_chain - def create_callback(self, transformer=None): callbacks = {} diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 3012e4c..f0bb7f5 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -298,8 +298,8 @@ class Parser: # this column. Find the item for the start_symbol, which is the root of the SPPF tree. solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] if not solutions: - expected_tokens = [t.expect for t in to_scan] - raise UnexpectedEOF(expected_tokens) + expected_terminals = [t.expect for t in to_scan] + raise UnexpectedEOF(expected_terminals) if self.debug: from .earley_forest import ForestToPyDotVisitor diff --git a/lark/tree.py b/lark/tree.py index 0b7114b..9d95015 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -46,14 +46,14 @@ class Tree(object): def _pretty(self, level, indent_str): if len(self.children) == 1 and not isinstance(self.children[0], Tree): - return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] + return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] - l = [ indent_str*level, self._pretty_label(), '\n' ] + l = [indent_str*level, self._pretty_label(), '\n'] for n in self.children: if isinstance(n, Tree): l += n._pretty(level+1, indent_str) else: - l += [ indent_str*(level+1), '%s' % (n,), '\n' ] + l += [indent_str*(level+1), '%s' % (n,), '\n'] return l @@ -102,8 +102,8 @@ class Tree(object): ###} def expand_kids_by_index(self, *indices): - "Expand (inline) children at the given indices" - for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices + """Expand (inline) children at the given indices""" + for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices kid = self.children[i] self.children[i:i+1] = kid.children @@ -144,12 +144,15 @@ class Tree(object): @property def line(self): return self.meta.line + @property def column(self): return self.meta.column + @property def end_line(self): return self.meta.end_line + @property def end_column(self): return self.meta.end_column @@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs): graph = pydot__tree_to_graph(tree, rankdir, **kwargs) graph.write(filename) + def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): """Creates a colorful image that represents the tree (data+children, without meta) @@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) for child in subtree.children] - node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data) + node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) i[0] += 1 graph.add_node(node) diff --git a/lark/utils.py b/lark/utils.py index cfd4306..b1c3535 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -1,10 +1,10 @@ -import sys import os from functools import reduce from ast import literal_eval from collections import deque ###{standalone +import sys, re import logging logger = logging.getLogger("lark") logger.addHandler(logging.StreamHandler()) @@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler()) # By default, we should not output any log messages logger.setLevel(logging.CRITICAL) +Py36 = (sys.version_info[:2] >= (3, 6)) + def classify(seq, key=None, value=None): d = {} @@ -27,7 +29,7 @@ def classify(seq, key=None, value=None): def _deserialize(data, namespace, memo): if isinstance(data, dict): - if '__type__' in data: # Object + if '__type__' in data: # Object class_ = namespace[data['__type__']] return class_.deserialize(data, memo) elif '@' in data: @@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize): return _deserialize(data, namespace, memo) - try: STRING_TYPE = basestring except NameError: # Python 3 @@ -118,10 +119,11 @@ from contextlib import contextmanager Str = type(u'') try: - classtype = types.ClassType # Python2 + classtype = types.ClassType # Python2 except AttributeError: classtype = type # Python3 + def smart_decorator(f, create_decorator): if isinstance(f, types.FunctionType): return wraps(f)(create_decorator(f, True)) @@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator): else: return create_decorator(f.__func__.__call__, True) + try: import regex except ImportError: regex = None -import sys, re -Py36 = (sys.version_info[:2] >= (3, 6)) - import sre_parse import sre_constants categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') + def get_regexp_width(expr): if regex: # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with @@ -173,9 +174,7 @@ def dedup_list(l): preserving the original order of the list. Assumes that the list entries are hashable.""" dedup = set() - return [ x for x in l if not (x in dedup or dedup.add(x))] - - + return [x for x in l if not (x in dedup or dedup.add(x))] try: @@ -197,8 +196,6 @@ except ImportError: pass - - try: compare = cmp except NameError: @@ -210,7 +207,6 @@ except NameError: return -1 - class Enumerator(Serialize): def __init__(self): self.enums = {} diff --git a/lark/visitors.py b/lark/visitors.py index 751afac..7e3bae4 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -8,6 +8,7 @@ from .lexer import Token ###{standalone from inspect import getmembers, getmro + class Discard(Exception): """When raising the Discard exception in a transformer callback, that node is discarded and won't appear in the parent. @@ -16,6 +17,7 @@ class Discard(Exception): # Transformers + class _Decoratable: "Provides support for decorating methods with @v_args" @@ -107,7 +109,6 @@ class Transformer(_Decoratable): except Exception as e: raise VisitError(token.type, token, e) - def _transform_children(self, children): for c in children: try: @@ -148,7 +149,6 @@ class Transformer(_Decoratable): return token - class InlineTransformer(Transformer): # XXX Deprecated def _call_userfunc(self, tree, new_children=None): # Assumes tree is already transformed @@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer): q = [tree] while q: t = q.pop() - rev_postfix.append( t ) + rev_postfix.append(t) if isinstance(t, Tree): q += t.children @@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer): return t - class Transformer_InPlaceRecursive(Transformer): "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" def _transform_tree(self, tree): @@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase): return tree - def visit_children_decor(func): "See Interpreter" @wraps(func) @@ -338,8 +336,6 @@ class Interpreter(_Decoratable): return self.visit_children(tree) - - # Decorators def _apply_decorator(obj, decorator, **kwargs): @@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs): return _apply(decorator, **kwargs) - def _inline_args__func(func): @wraps(func) def create_decorator(_f, with_self): @@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated return _apply_decorator(obj, _inline_args__func) - def _visitor_args_func_dec(func, visit_wrapper=None, static=False): def create_decorator(_f, with_self): if with_self: @@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False): return f -def _vargs_inline(f, data, children, meta): +def _vargs_inline(f, _data, children, _meta): return f(*children) -def _vargs_meta_inline(f, data, children, meta): +def _vargs_meta_inline(f, _data, children, meta): return f(meta, *children) -def _vargs_meta(f, data, children, meta): +def _vargs_meta(f, _data, children, meta): return f(children, meta) # TODO swap these for consistency? Backwards incompatible! def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) @@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) tree (bool, optional): Provides the entire tree as the argument, instead of the children. + wrapper (function, optional): Provide a function to decorate all methods. Example: :: @@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): ###} -#--- Visitor Utilities --- +# --- Visitor Utilities --- class CollapseAmbiguities(Transformer): """ @@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer): """ def _ambig(self, options): return sum(options, []) + def __default__(self, data, children_lists, meta): return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] + def __default_token__(self, t): return [t]