@@ -1,7 +1,7 @@ | |||
# -*- coding: utf-8 -*- | |||
from types import ModuleType | |||
from typing import ( | |||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, | |||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||
Pattern as REPattern, | |||
) | |||
from abc import abstractmethod, ABC | |||
@@ -100,10 +100,22 @@ class Lexer(ABC): | |||
lex: Callable[..., Iterator[Token]] | |||
class LexerConf: | |||
tokens: Collection[TerminalDef] | |||
re_module: ModuleType | |||
ignore: Collection[str] = () | |||
postlex: Any =None | |||
callbacks: Optional[Dict[str, _Callback]] = None | |||
g_regex_flags: int = 0 | |||
skip_validation: bool = False | |||
use_bytes: bool = False | |||
class TraditionalLexer(Lexer): | |||
terminals: Collection[TerminalDef] | |||
ignore_types: List[str] | |||
newline_types: List[str] | |||
ignore_types: FrozenSet[str] | |||
newline_types: FrozenSet[str] | |||
user_callbacks: Dict[str, _Callback] | |||
callback: Dict[str, _Callback] | |||
mres: List[Tuple[REPattern, Dict[int, str]]] | |||
@@ -111,11 +123,7 @@ class TraditionalLexer(Lexer): | |||
def __init__( | |||
self, | |||
terminals: Collection[TerminalDef], | |||
re_: ModuleType, | |||
ignore: Collection[str] = ..., | |||
user_callbacks: Dict[str, _Callback] = ..., | |||
g_regex_flags: int = ... | |||
conf: LexerConf | |||
): | |||
... | |||
@@ -128,6 +136,8 @@ class TraditionalLexer(Lexer): | |||
def lex(self, stream: str) -> Iterator[Token]: | |||
... | |||
def next_token(self, lex_state: Any) -> Token: | |||
... | |||
class ContextualLexer(Lexer): | |||
lexers: Dict[str, TraditionalLexer] | |||
@@ -3,6 +3,7 @@ from .lexer import TerminalDef | |||
###{standalone | |||
class LexerConf(Serialize): | |||
__serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | |||
__serialize_namespace__ = TerminalDef, | |||
@@ -19,11 +20,10 @@ class LexerConf(Serialize): | |||
###} | |||
class ParserConf: | |||
def __init__(self, rules, callbacks, start): | |||
assert isinstance(start, list) | |||
self.rules = rules | |||
self.callbacks = callbacks | |||
self.start = start | |||
@@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger | |||
class LarkError(Exception): | |||
pass | |||
class GrammarError(LarkError): | |||
pass | |||
class ParseError(LarkError): | |||
pass | |||
class LexError(LarkError): | |||
pass | |||
class UnexpectedEOF(ParseError): | |||
def __init__(self, expected): | |||
self.expected = expected | |||
@@ -40,14 +40,12 @@ class Terminal(Symbol): | |||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | |||
class NonTerminal(Symbol): | |||
__serialize_fields__ = 'name', | |||
is_term = False | |||
class RuleOptions(Serialize): | |||
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | |||
@@ -104,5 +102,4 @@ class Rule(Serialize): | |||
return self.origin == other.origin and self.expansion == other.expansion | |||
###} |
@@ -24,6 +24,7 @@ except ImportError: | |||
###{standalone | |||
class LarkOptions(Serialize): | |||
"""Specifies the options for Lark | |||
@@ -36,7 +37,7 @@ class LarkOptions(Serialize): | |||
debug | |||
Display debug information, such as warnings (default: False) | |||
transformer | |||
Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) | |||
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||
propagate_positions | |||
Propagates (line, column, end_line, end_column) attributes into all tree branches. | |||
maybe_placeholders | |||
@@ -320,7 +321,7 @@ class Lark(Serialize): | |||
# Else, if the user asked to disable priorities, strip them from the | |||
# rules. This allows the Earley parsers to skip an extra forest walk | |||
# for improved performance, if you don't need them (or didn't specify any). | |||
elif self.options.priority == None: | |||
elif self.options.priority is None: | |||
for rule in self.rules: | |||
if rule.options.priority is not None: | |||
rule.options.priority = None | |||
@@ -360,7 +361,7 @@ class Lark(Serialize): | |||
self.rules, | |||
self.options.tree_class or Tree, | |||
self.options.propagate_positions, | |||
self.options.parser!='lalr' and self.options.ambiguity=='explicit', | |||
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | |||
self.options.maybe_placeholders | |||
) | |||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | |||
@@ -410,7 +411,7 @@ class Lark(Serialize): | |||
data['parser'], | |||
memo, | |||
self._callbacks, | |||
self.options, # Not all, but multiple attributes are used | |||
self.options, # Not all, but multiple attributes are used | |||
) | |||
self.terminals = self.parser.lexer_conf.tokens | |||
self._terminals_dict = {t.name: t for t in self.terminals} | |||
@@ -1,4 +1,4 @@ | |||
## Lexer Implementation | |||
# Lexer Implementation | |||
import re | |||
@@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||
###{standalone | |||
from copy import copy | |||
class Pattern(Serialize): | |||
def __init__(self, value, flags=()): | |||
@@ -20,6 +21,7 @@ class Pattern(Serialize): | |||
# Pattern Hashing assumes all subclasses have a different priority! | |||
def __hash__(self): | |||
return hash((type(self), self.value, self.flags)) | |||
def __eq__(self, other): | |||
return type(self) == type(other) and self.value == other.value and self.flags == other.flags | |||
@@ -53,6 +55,7 @@ class PatternStr(Pattern): | |||
return len(self.value) | |||
max_width = min_width | |||
class PatternRE(Pattern): | |||
__serialize_fields__ = 'value', 'flags', '_width' | |||
@@ -70,6 +73,7 @@ class PatternRE(Pattern): | |||
@property | |||
def min_width(self): | |||
return self._get_width()[0] | |||
@property | |||
def max_width(self): | |||
return self._get_width()[1] | |||
@@ -139,7 +143,7 @@ class Token(Str): | |||
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||
def __reduce__(self): | |||
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) | |||
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||
def __repr__(self): | |||
return 'Token(%r, %r)' % (self.type, self.value) | |||
@@ -193,6 +197,7 @@ class UnlessCallback: | |||
break | |||
return t | |||
class CallChain: | |||
def __init__(self, callback1, callback2, cond): | |||
self.callback1 = callback1 | |||
@@ -204,16 +209,13 @@ class CallChain: | |||
return self.callback2(t) if self.cond(t2) else t2 | |||
def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||
tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | |||
assert len(tokens_by_type) <= 2, tokens_by_type.keys() | |||
embedded_strs = set() | |||
callback = {} | |||
for retok in tokens_by_type.get(PatternRE, []): | |||
unless = [] # {} | |||
unless = [] | |||
for strtok in tokens_by_type.get(PatternStr, []): | |||
if strtok.priority > retok.priority: | |||
continue | |||
@@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes) | |||
except AssertionError: # Yes, this is what Python provides us.. :/ | |||
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | |||
mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||
mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||
terminals = terminals[max_size:] | |||
return mres | |||
def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | |||
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | |||
def _regexp_has_newline(r): | |||
r"""Expressions that may indicate newlines in a regexp: | |||
- newlines (\n) | |||
@@ -262,6 +266,7 @@ def _regexp_has_newline(r): | |||
""" | |||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
class Lexer(object): | |||
"""Lexer interface | |||
@@ -300,7 +305,7 @@ class TraditionalLexer(Lexer): | |||
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | |||
self.ignore_types = frozenset(conf.ignore) | |||
terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||
self.terminals = terminals | |||
self.user_callbacks = conf.callbacks | |||
self.g_regex_flags = conf.g_regex_flags | |||
@@ -309,7 +314,7 @@ class TraditionalLexer(Lexer): | |||
self._mres = None | |||
def _build(self): | |||
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes) | |||
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | |||
assert all(self.callback.values()) | |||
for type_, f in self.user_callbacks.items(): | |||
@@ -333,7 +338,7 @@ class TraditionalLexer(Lexer): | |||
if m: | |||
return m.group(0), type_from_index[m.lastindex] | |||
def lex(self, state, parser_state): | |||
def lex(self, state, _parser_state): | |||
with suppress(EOFError): | |||
while True: | |||
yield self.next_token(state) | |||
@@ -372,6 +377,7 @@ class TraditionalLexer(Lexer): | |||
# EOF | |||
raise EOFError(self) | |||
class LexerState: | |||
__slots__ = 'text', 'line_ctr', 'last_token' | |||
@@ -383,6 +389,7 @@ class LexerState: | |||
def __copy__(self): | |||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
class ContextualLexer(Lexer): | |||
def __init__(self, conf, states, always_accept=()): | |||
@@ -430,8 +437,9 @@ class ContextualLexer(Lexer): | |||
token = self.root_lexer.next_token(lexer_state) | |||
raise UnexpectedToken(token, e.allowed, state=parser_state.position) | |||
class LexerThread: | |||
"A thread that ties a lexer instance and a lexer state, to be used by the parser" | |||
"""A thread that ties a lexer instance and a lexer state, to be used by the parser""" | |||
def __init__(self, lexer, text): | |||
self.lexer = lexer | |||
@@ -1,4 +1,4 @@ | |||
"Parses and creates Grammar objects" | |||
"""Parses and creates Grammar objects""" | |||
import os.path | |||
import sys | |||
@@ -166,6 +166,7 @@ RULES = { | |||
'literal': ['REGEXP', 'STRING'], | |||
} | |||
@inline_args | |||
class EBNF_to_BNF(Transformer_InPlace): | |||
def __init__(self): | |||
@@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor): | |||
for i, child in enumerate(tree.children): | |||
if isinstance(child, Tree) and child.data == 'expansions': | |||
tree.data = 'expansions' | |||
tree.children = [self.visit(ST('expansion', [option if i==j else other | |||
for j, other in enumerate(tree.children)])) | |||
for option in dedup_list(child.children)] | |||
tree.children = [self.visit(ST('expansion', [option if i == j else other | |||
for j, other in enumerate(tree.children)])) | |||
for option in dedup_list(child.children)] | |||
self._flatten(tree) | |||
break | |||
@@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor): | |||
class RuleTreeToText(Transformer): | |||
def expansions(self, x): | |||
return x | |||
def expansion(self, symbols): | |||
return symbols, None | |||
def alias(self, x): | |||
(expansion, _alias), alias = x | |||
assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | |||
@@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace): | |||
tokenmods, value = args | |||
return tokenmods + [value] | |||
class PrepareAnonTerminals(Transformer_InPlace): | |||
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | |||
"""Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" | |||
def __init__(self, terminals): | |||
self.terminals = terminals | |||
@@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
self.i = 0 | |||
self.rule_options = None | |||
@inline_args | |||
def pattern(self, p): | |||
value = p.value | |||
@@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
except KeyError: | |||
if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | |||
with suppress(UnicodeEncodeError): | |||
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||
term_name = value.upper() | |||
if term_name in self.term_set: | |||
term_name = None | |||
elif isinstance(p, PatternRE): | |||
if p in self.term_reverse: # Kind of a weird placement.name | |||
if p in self.term_reverse: # Kind of a weird placement.name | |||
term_name = self.term_reverse[p].name | |||
else: | |||
assert False, p | |||
@@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
class _ReplaceSymbols(Transformer_InPlace): | |||
" Helper for ApplyTemplates " | |||
"""Helper for ApplyTemplates""" | |||
def __init__(self): | |||
self.names = {} | |||
@@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace): | |||
return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | |||
return self.__default__('template_usage', c, None) | |||
class ApplyTemplates(Transformer_InPlace): | |||
" Apply the templates, creating new rules that represent the used templates " | |||
"""Apply the templates, creating new rules that represent the used templates""" | |||
def __init__(self, rule_defs): | |||
self.rule_defs = rule_defs | |||
@@ -401,8 +405,6 @@ def _rfind(s, choices): | |||
return max(s.rfind(c) for c in choices) | |||
def _literal_to_pattern(literal): | |||
v = literal.value | |||
flag_start = _rfind(v, '/"')+1 | |||
@@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace): | |||
assert start.type == end.type == 'STRING' | |||
start = start.value[1:-1] | |||
end = end.value[1:-1] | |||
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end))) | |||
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 | |||
regexp = '[%s-%s]' % (start, end) | |||
return ST('pattern', [PatternRE(regexp)]) | |||
@@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set): | |||
return PatternRE(regexp, flags) | |||
class TerminalTreeToPattern(Transformer): | |||
def pattern(self, ps): | |||
p ,= ps | |||
@@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer): | |||
def value(self, v): | |||
return v[0] | |||
class PrepareSymbols(Transformer_InPlace): | |||
def value(self, v): | |||
v ,= v | |||
@@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace): | |||
return Terminal(Str(v.value), filter_out=v.startswith('_')) | |||
assert False | |||
def _choice_of_rules(rules): | |||
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | |||
def nr_deepcopy_tree(t): | |||
"Deepcopy tree `t` without recursion" | |||
"""Deepcopy tree `t` without recursion""" | |||
return Transformer_NonRecursive(False).transform(t) | |||
class Grammar: | |||
def __init__(self, rule_defs, term_defs, ignore): | |||
self.term_defs = term_defs | |||
@@ -547,7 +554,7 @@ class Grammar: | |||
raise GrammarError("Terminals cannot be empty (%s)" % name) | |||
transformer = PrepareLiterals() * TerminalTreeToPattern() | |||
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) | |||
terminals = [TerminalDef(name, transformer.transform(term_tree), priority) | |||
for name, (term_tree, priority) in term_defs if term_tree] | |||
# ================= | |||
@@ -566,10 +573,10 @@ class Grammar: | |||
ebnf_to_bnf = EBNF_to_BNF() | |||
rules = [] | |||
i = 0 | |||
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||
name, params, rule_tree, options = rule_defs[i] | |||
i += 1 | |||
if len(params) != 0: # Dont transform templates | |||
if len(params) != 0: # Dont transform templates | |||
continue | |||
rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | |||
ebnf_to_bnf.rule_options = rule_options | |||
@@ -594,7 +601,7 @@ class Grammar: | |||
for i, (expansion, alias) in enumerate(expansions): | |||
if alias and name.startswith('_'): | |||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) | |||
empty_indices = [x==_EMPTY for x in expansion] | |||
if any(empty_indices): | |||
@@ -623,14 +630,13 @@ class Grammar: | |||
# Remove duplicates | |||
compiled_rules = list(set(compiled_rules)) | |||
# Filter out unused rules | |||
while True: | |||
c = len(compiled_rules) | |||
used_rules = {s for r in compiled_rules | |||
for s in r.expansion | |||
if isinstance(s, NonTerminal) | |||
and s != r.origin} | |||
for s in r.expansion | |||
if isinstance(s, NonTerminal) | |||
and s != r.origin} | |||
used_rules |= {NonTerminal(s) for s in start} | |||
compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | |||
for r in unused: | |||
@@ -663,6 +669,7 @@ class PackageResource(object): | |||
def __repr__(self): | |||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | |||
class FromPackageLoader(object): | |||
""" | |||
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | |||
@@ -699,11 +706,12 @@ class FromPackageLoader(object): | |||
return PackageResource(self.pkg_name, full_path), text.decode() | |||
raise IOError() | |||
stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||
stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||
_imported_grammars = {} | |||
def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
"""Returns all rules and terminals of grammar, prepended | |||
with a 'namespace' prefix, except for those which are aliased. | |||
@@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | |||
return _find_used_symbols(tree) - set(params) | |||
def get_namespace_name(name, params): | |||
if params is not None: | |||
try: | |||
@@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
else: | |||
assert symbol.type == 'RULE' | |||
_, params, tree, options = imported_rules[symbol] | |||
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} | |||
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} | |||
for t in tree.iter_subtrees(): | |||
for i, c in enumerate(t.children): | |||
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | |||
t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | |||
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||
rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | |||
return term_defs, rule_defs | |||
def resolve_term_references(term_defs): | |||
# TODO Solve with transitive closure (maybe) | |||
@@ -798,7 +802,7 @@ def options_from_rule(name, params, *x): | |||
else: | |||
expansions ,= x | |||
priority = None | |||
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||
keep_all_tokens = name.startswith('!') | |||
name = name.lstrip('!') | |||
@@ -812,10 +816,12 @@ def options_from_rule(name, params, *x): | |||
def symbols_from_strcase(expansion): | |||
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | |||
@inline_args | |||
class PrepareGrammar(Transformer_InPlace): | |||
def terminal(self, name): | |||
return name | |||
def nonterminal(self, name): | |||
return name | |||
@@ -825,10 +831,11 @@ def _find_used_symbols(tree): | |||
return {t for x in tree.find_data('expansion') | |||
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||
class GrammarLoader: | |||
ERRORS = [ | |||
('Unclosed parenthesis', ['a: (\n']), | |||
('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | |||
('Illegal name for rules or terminals', ['Aa:\n']), | |||
('Alias expects lowercase name', ['a: -> "a"\n']), | |||
@@ -843,8 +850,9 @@ class GrammarLoader: | |||
def __init__(self, global_keep_all_tokens): | |||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | |||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) | |||
for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||
callback = ParseTreeBuilder(rules, ST).create_callback() | |||
import re | |||
lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | |||
@@ -881,10 +889,10 @@ class GrammarLoader: | |||
return _imported_grammars[grammar_path] | |||
def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | |||
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | |||
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" | |||
try: | |||
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||
tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) | |||
except UnexpectedCharacters as e: | |||
context = e.get_context(grammar_text) | |||
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | |||
@@ -1037,7 +1045,7 @@ class GrammarLoader: | |||
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | |||
if len(args) != rule_names[sym]: | |||
raise GrammarError("Wrong number of template arguments used for %s " | |||
"(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) | |||
"(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) | |||
for sym in _find_used_symbols(expansions): | |||
if sym.type == 'TERMINAL': | |||
if sym not in terminal_names: | |||
@@ -1046,10 +1054,8 @@ class GrammarLoader: | |||
if sym not in rule_names and sym not in params: | |||
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | |||
return Grammar(rules, term_defs, ignore_names) | |||
def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | |||
return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) |
@@ -1,7 +1,7 @@ | |||
from .exceptions import GrammarError | |||
from .lexer import Token | |||
from .tree import Tree | |||
from .visitors import InlineTransformer # XXX Deprecated | |||
from .visitors import InlineTransformer # XXX Deprecated | |||
from .visitors import Transformer_InPlace | |||
from .visitors import _vargs_meta, _vargs_meta_inline | |||
@@ -20,6 +20,7 @@ class ExpandSingleChild: | |||
else: | |||
return self.node_builder(children) | |||
class PropagatePositions: | |||
def __init__(self, node_builder): | |||
self.node_builder = node_builder | |||
@@ -87,8 +88,9 @@ class ChildFilter: | |||
return self.node_builder(filtered) | |||
class ChildFilterLALR(ChildFilter): | |||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" | |||
def __call__(self, children): | |||
filtered = [] | |||
@@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter): | |||
return self.node_builder(filtered) | |||
class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
def __init__(self, to_include, node_builder): | |||
@@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||
filtered.append(children[i]) | |||
return self.node_builder(filtered) | |||
def _should_expand(sym): | |||
return not sym.is_term and sym.name.startswith('_') | |||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | |||
# Prepare empty_indices as: How many Nones to insert at each index? | |||
if _empty_indices: | |||
@@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi | |||
# LALR without placeholders | |||
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | |||
class AmbiguousExpander: | |||
"""Deal with the case where we're expanding children ('_rule') into a parent but the children | |||
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | |||
@@ -167,10 +173,10 @@ class AmbiguousExpander: | |||
self.to_expand = to_expand | |||
def __call__(self, children): | |||
def _is_ambig_tree(child): | |||
return hasattr(child, 'data') and child.data == '_ambig' | |||
def _is_ambig_tree(t): | |||
return hasattr(t, 'data') and t.data == '_ambig' | |||
#### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||
# All children of an _ambig node should be a derivation of that ambig node, hence | |||
# it is safe to assume that if we see an _ambig node nested within an ambig node | |||
# it is safe to simply expand it into the parent _ambig node as an alternative derivation. | |||
@@ -186,15 +192,17 @@ class AmbiguousExpander: | |||
if not ambiguous: | |||
return self.node_builder(children) | |||
expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] | |||
expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] | |||
return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | |||
def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | |||
to_expand = [i for i, sym in enumerate(expansion) | |||
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | |||
if to_expand: | |||
return partial(AmbiguousExpander, to_expand, tree_class) | |||
class AmbiguousIntermediateExpander: | |||
""" | |||
Propagate ambiguous intermediate nodes and their derivations up to the | |||
@@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander: | |||
return self.node_builder(children) | |||
def ptb_inline_args(func): | |||
@wraps(func) | |||
def f(children): | |||
return func(*children) | |||
return f | |||
def inplace_transformer(func): | |||
@wraps(func) | |||
def f(children): | |||
@@ -289,9 +299,11 @@ def inplace_transformer(func): | |||
return func(tree) | |||
return f | |||
def apply_visit_wrapper(func, name, wrapper): | |||
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | |||
raise NotImplementedError("Meta args not supported for internal transformer") | |||
@wraps(func) | |||
def f(children): | |||
return wrapper(func, name, children, None) | |||
@@ -323,7 +335,6 @@ class ParseTreeBuilder: | |||
yield rule, wrapper_chain | |||
def create_callback(self, transformer=None): | |||
callbacks = {} | |||
@@ -298,8 +298,8 @@ class Parser: | |||
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. | |||
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | |||
if not solutions: | |||
expected_tokens = [t.expect for t in to_scan] | |||
raise UnexpectedEOF(expected_tokens) | |||
expected_terminals = [t.expect for t in to_scan] | |||
raise UnexpectedEOF(expected_terminals) | |||
if self.debug: | |||
from .earley_forest import ForestToPyDotVisitor | |||
@@ -46,14 +46,14 @@ class Tree(object): | |||
def _pretty(self, level, indent_str): | |||
if len(self.children) == 1 and not isinstance(self.children[0], Tree): | |||
return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||
return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||
l = [ indent_str*level, self._pretty_label(), '\n' ] | |||
l = [indent_str*level, self._pretty_label(), '\n'] | |||
for n in self.children: | |||
if isinstance(n, Tree): | |||
l += n._pretty(level+1, indent_str) | |||
else: | |||
l += [ indent_str*(level+1), '%s' % (n,), '\n' ] | |||
l += [indent_str*(level+1), '%s' % (n,), '\n'] | |||
return l | |||
@@ -102,8 +102,8 @@ class Tree(object): | |||
###} | |||
def expand_kids_by_index(self, *indices): | |||
"Expand (inline) children at the given indices" | |||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
"""Expand (inline) children at the given indices""" | |||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
kid = self.children[i] | |||
self.children[i:i+1] = kid.children | |||
@@ -144,12 +144,15 @@ class Tree(object): | |||
@property | |||
def line(self): | |||
return self.meta.line | |||
@property | |||
def column(self): | |||
return self.meta.column | |||
@property | |||
def end_line(self): | |||
return self.meta.end_line | |||
@property | |||
def end_column(self): | |||
return self.meta.end_column | |||
@@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs): | |||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | |||
graph.write(filename) | |||
def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
"""Creates a colorful image that represents the tree (data+children, without meta) | |||
@@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | |||
for child in subtree.children] | |||
node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data) | |||
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) | |||
i[0] += 1 | |||
graph.add_node(node) | |||
@@ -1,10 +1,10 @@ | |||
import sys | |||
import os | |||
from functools import reduce | |||
from ast import literal_eval | |||
from collections import deque | |||
###{standalone | |||
import sys, re | |||
import logging | |||
logger = logging.getLogger("lark") | |||
logger.addHandler(logging.StreamHandler()) | |||
@@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler()) | |||
# By default, we should not output any log messages | |||
logger.setLevel(logging.CRITICAL) | |||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||
def classify(seq, key=None, value=None): | |||
d = {} | |||
@@ -27,7 +29,7 @@ def classify(seq, key=None, value=None): | |||
def _deserialize(data, namespace, memo): | |||
if isinstance(data, dict): | |||
if '__type__' in data: # Object | |||
if '__type__' in data: # Object | |||
class_ = namespace[data['__type__']] | |||
return class_.deserialize(data, memo) | |||
elif '@' in data: | |||
@@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize): | |||
return _deserialize(data, namespace, memo) | |||
try: | |||
STRING_TYPE = basestring | |||
except NameError: # Python 3 | |||
@@ -118,10 +119,11 @@ from contextlib import contextmanager | |||
Str = type(u'') | |||
try: | |||
classtype = types.ClassType # Python2 | |||
classtype = types.ClassType # Python2 | |||
except AttributeError: | |||
classtype = type # Python3 | |||
def smart_decorator(f, create_decorator): | |||
if isinstance(f, types.FunctionType): | |||
return wraps(f)(create_decorator(f, True)) | |||
@@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator): | |||
else: | |||
return create_decorator(f.__func__.__call__, True) | |||
try: | |||
import regex | |||
except ImportError: | |||
regex = None | |||
import sys, re | |||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||
import sre_parse | |||
import sre_constants | |||
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | |||
def get_regexp_width(expr): | |||
if regex: | |||
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | |||
@@ -173,9 +174,7 @@ def dedup_list(l): | |||
preserving the original order of the list. Assumes that | |||
the list entries are hashable.""" | |||
dedup = set() | |||
return [ x for x in l if not (x in dedup or dedup.add(x))] | |||
return [x for x in l if not (x in dedup or dedup.add(x))] | |||
try: | |||
@@ -197,8 +196,6 @@ except ImportError: | |||
pass | |||
try: | |||
compare = cmp | |||
except NameError: | |||
@@ -210,7 +207,6 @@ except NameError: | |||
return -1 | |||
class Enumerator(Serialize): | |||
def __init__(self): | |||
self.enums = {} | |||
@@ -8,6 +8,7 @@ from .lexer import Token | |||
###{standalone | |||
from inspect import getmembers, getmro | |||
class Discard(Exception): | |||
"""When raising the Discard exception in a transformer callback, | |||
that node is discarded and won't appear in the parent. | |||
@@ -16,6 +17,7 @@ class Discard(Exception): | |||
# Transformers | |||
class _Decoratable: | |||
"Provides support for decorating methods with @v_args" | |||
@@ -107,7 +109,6 @@ class Transformer(_Decoratable): | |||
except Exception as e: | |||
raise VisitError(token.type, token, e) | |||
def _transform_children(self, children): | |||
for c in children: | |||
try: | |||
@@ -148,7 +149,6 @@ class Transformer(_Decoratable): | |||
return token | |||
class InlineTransformer(Transformer): # XXX Deprecated | |||
def _call_userfunc(self, tree, new_children=None): | |||
# Assumes tree is already transformed | |||
@@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer): | |||
q = [tree] | |||
while q: | |||
t = q.pop() | |||
rev_postfix.append( t ) | |||
rev_postfix.append(t) | |||
if isinstance(t, Tree): | |||
q += t.children | |||
@@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer): | |||
return t | |||
class Transformer_InPlaceRecursive(Transformer): | |||
"Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | |||
def _transform_tree(self, tree): | |||
@@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase): | |||
return tree | |||
def visit_children_decor(func): | |||
"See Interpreter" | |||
@wraps(func) | |||
@@ -338,8 +336,6 @@ class Interpreter(_Decoratable): | |||
return self.visit_children(tree) | |||
# Decorators | |||
def _apply_decorator(obj, decorator, **kwargs): | |||
@@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs): | |||
return _apply(decorator, **kwargs) | |||
def _inline_args__func(func): | |||
@wraps(func) | |||
def create_decorator(_f, with_self): | |||
@@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated | |||
return _apply_decorator(obj, _inline_args__func) | |||
def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||
def create_decorator(_f, with_self): | |||
if with_self: | |||
@@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||
return f | |||
def _vargs_inline(f, data, children, meta): | |||
def _vargs_inline(f, _data, children, _meta): | |||
return f(*children) | |||
def _vargs_meta_inline(f, data, children, meta): | |||
def _vargs_meta_inline(f, _data, children, meta): | |||
return f(meta, *children) | |||
def _vargs_meta(f, data, children, meta): | |||
def _vargs_meta(f, _data, children, meta): | |||
return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | |||
def _vargs_tree(f, data, children, meta): | |||
return f(Tree(data, children, meta)) | |||
@@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||
inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | |||
meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | |||
tree (bool, optional): Provides the entire tree as the argument, instead of the children. | |||
wrapper (function, optional): Provide a function to decorate all methods. | |||
Example: | |||
:: | |||
@@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||
###} | |||
#--- Visitor Utilities --- | |||
# --- Visitor Utilities --- | |||
class CollapseAmbiguities(Transformer): | |||
""" | |||
@@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer): | |||
""" | |||
def _ambig(self, options): | |||
return sum(options, []) | |||
def __default__(self, data, children_lists, meta): | |||
return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | |||
def __default_token__(self, t): | |||
return [t] |