@@ -1,7 +1,7 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
from types import ModuleType | from types import ModuleType | ||||
from typing import ( | from typing import ( | ||||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, | |||||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
Pattern as REPattern, | Pattern as REPattern, | ||||
) | ) | ||||
from abc import abstractmethod, ABC | from abc import abstractmethod, ABC | ||||
@@ -100,10 +100,22 @@ class Lexer(ABC): | |||||
lex: Callable[..., Iterator[Token]] | lex: Callable[..., Iterator[Token]] | ||||
class LexerConf: | |||||
tokens: Collection[TerminalDef] | |||||
re_module: ModuleType | |||||
ignore: Collection[str] = () | |||||
postlex: Any =None | |||||
callbacks: Optional[Dict[str, _Callback]] = None | |||||
g_regex_flags: int = 0 | |||||
skip_validation: bool = False | |||||
use_bytes: bool = False | |||||
class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
terminals: Collection[TerminalDef] | terminals: Collection[TerminalDef] | ||||
ignore_types: List[str] | |||||
newline_types: List[str] | |||||
ignore_types: FrozenSet[str] | |||||
newline_types: FrozenSet[str] | |||||
user_callbacks: Dict[str, _Callback] | user_callbacks: Dict[str, _Callback] | ||||
callback: Dict[str, _Callback] | callback: Dict[str, _Callback] | ||||
mres: List[Tuple[REPattern, Dict[int, str]]] | mres: List[Tuple[REPattern, Dict[int, str]]] | ||||
@@ -111,11 +123,7 @@ class TraditionalLexer(Lexer): | |||||
def __init__( | def __init__( | ||||
self, | self, | ||||
terminals: Collection[TerminalDef], | |||||
re_: ModuleType, | |||||
ignore: Collection[str] = ..., | |||||
user_callbacks: Dict[str, _Callback] = ..., | |||||
g_regex_flags: int = ... | |||||
conf: LexerConf | |||||
): | ): | ||||
... | ... | ||||
@@ -128,6 +136,8 @@ class TraditionalLexer(Lexer): | |||||
def lex(self, stream: str) -> Iterator[Token]: | def lex(self, stream: str) -> Iterator[Token]: | ||||
... | ... | ||||
def next_token(self, lex_state: Any) -> Token: | |||||
... | |||||
class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
lexers: Dict[str, TraditionalLexer] | lexers: Dict[str, TraditionalLexer] | ||||
@@ -3,6 +3,7 @@ from .lexer import TerminalDef | |||||
###{standalone | ###{standalone | ||||
class LexerConf(Serialize): | class LexerConf(Serialize): | ||||
__serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | ||||
__serialize_namespace__ = TerminalDef, | __serialize_namespace__ = TerminalDef, | ||||
@@ -19,11 +20,10 @@ class LexerConf(Serialize): | |||||
###} | ###} | ||||
class ParserConf: | class ParserConf: | ||||
def __init__(self, rules, callbacks, start): | def __init__(self, rules, callbacks, start): | ||||
assert isinstance(start, list) | assert isinstance(start, list) | ||||
self.rules = rules | self.rules = rules | ||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.start = start | self.start = start | ||||
@@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger | |||||
class LarkError(Exception): | class LarkError(Exception): | ||||
pass | pass | ||||
class GrammarError(LarkError): | class GrammarError(LarkError): | ||||
pass | pass | ||||
class ParseError(LarkError): | class ParseError(LarkError): | ||||
pass | pass | ||||
class LexError(LarkError): | class LexError(LarkError): | ||||
pass | pass | ||||
class UnexpectedEOF(ParseError): | class UnexpectedEOF(ParseError): | ||||
def __init__(self, expected): | def __init__(self, expected): | ||||
self.expected = expected | self.expected = expected | ||||
@@ -40,14 +40,12 @@ class Terminal(Symbol): | |||||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | ||||
class NonTerminal(Symbol): | class NonTerminal(Symbol): | ||||
__serialize_fields__ = 'name', | __serialize_fields__ = 'name', | ||||
is_term = False | is_term = False | ||||
class RuleOptions(Serialize): | class RuleOptions(Serialize): | ||||
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | ||||
@@ -104,5 +102,4 @@ class Rule(Serialize): | |||||
return self.origin == other.origin and self.expansion == other.expansion | return self.origin == other.origin and self.expansion == other.expansion | ||||
###} | ###} |
@@ -24,6 +24,7 @@ except ImportError: | |||||
###{standalone | ###{standalone | ||||
class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
"""Specifies the options for Lark | """Specifies the options for Lark | ||||
@@ -36,7 +37,7 @@ class LarkOptions(Serialize): | |||||
debug | debug | ||||
Display debug information, such as warnings (default: False) | Display debug information, such as warnings (default: False) | ||||
transformer | transformer | ||||
Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) | |||||
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||||
propagate_positions | propagate_positions | ||||
Propagates (line, column, end_line, end_column) attributes into all tree branches. | Propagates (line, column, end_line, end_column) attributes into all tree branches. | ||||
maybe_placeholders | maybe_placeholders | ||||
@@ -320,7 +321,7 @@ class Lark(Serialize): | |||||
# Else, if the user asked to disable priorities, strip them from the | # Else, if the user asked to disable priorities, strip them from the | ||||
# rules. This allows the Earley parsers to skip an extra forest walk | # rules. This allows the Earley parsers to skip an extra forest walk | ||||
# for improved performance, if you don't need them (or didn't specify any). | # for improved performance, if you don't need them (or didn't specify any). | ||||
elif self.options.priority == None: | |||||
elif self.options.priority is None: | |||||
for rule in self.rules: | for rule in self.rules: | ||||
if rule.options.priority is not None: | if rule.options.priority is not None: | ||||
rule.options.priority = None | rule.options.priority = None | ||||
@@ -360,7 +361,7 @@ class Lark(Serialize): | |||||
self.rules, | self.rules, | ||||
self.options.tree_class or Tree, | self.options.tree_class or Tree, | ||||
self.options.propagate_positions, | self.options.propagate_positions, | ||||
self.options.parser!='lalr' and self.options.ambiguity=='explicit', | |||||
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | |||||
self.options.maybe_placeholders | self.options.maybe_placeholders | ||||
) | ) | ||||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
@@ -410,7 +411,7 @@ class Lark(Serialize): | |||||
data['parser'], | data['parser'], | ||||
memo, | memo, | ||||
self._callbacks, | self._callbacks, | ||||
self.options, # Not all, but multiple attributes are used | |||||
self.options, # Not all, but multiple attributes are used | |||||
) | ) | ||||
self.terminals = self.parser.lexer_conf.tokens | self.terminals = self.parser.lexer_conf.tokens | ||||
self._terminals_dict = {t.name: t for t in self.terminals} | self._terminals_dict = {t.name: t for t in self.terminals} | ||||
@@ -1,4 +1,4 @@ | |||||
## Lexer Implementation | |||||
# Lexer Implementation | |||||
import re | import re | ||||
@@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||||
###{standalone | ###{standalone | ||||
from copy import copy | from copy import copy | ||||
class Pattern(Serialize): | class Pattern(Serialize): | ||||
def __init__(self, value, flags=()): | def __init__(self, value, flags=()): | ||||
@@ -20,6 +21,7 @@ class Pattern(Serialize): | |||||
# Pattern Hashing assumes all subclasses have a different priority! | # Pattern Hashing assumes all subclasses have a different priority! | ||||
def __hash__(self): | def __hash__(self): | ||||
return hash((type(self), self.value, self.flags)) | return hash((type(self), self.value, self.flags)) | ||||
def __eq__(self, other): | def __eq__(self, other): | ||||
return type(self) == type(other) and self.value == other.value and self.flags == other.flags | return type(self) == type(other) and self.value == other.value and self.flags == other.flags | ||||
@@ -53,6 +55,7 @@ class PatternStr(Pattern): | |||||
return len(self.value) | return len(self.value) | ||||
max_width = min_width | max_width = min_width | ||||
class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
__serialize_fields__ = 'value', 'flags', '_width' | __serialize_fields__ = 'value', 'flags', '_width' | ||||
@@ -70,6 +73,7 @@ class PatternRE(Pattern): | |||||
@property | @property | ||||
def min_width(self): | def min_width(self): | ||||
return self._get_width()[0] | return self._get_width()[0] | ||||
@property | @property | ||||
def max_width(self): | def max_width(self): | ||||
return self._get_width()[1] | return self._get_width()[1] | ||||
@@ -139,7 +143,7 @@ class Token(Str): | |||||
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | ||||
def __reduce__(self): | def __reduce__(self): | ||||
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) | |||||
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||||
def __repr__(self): | def __repr__(self): | ||||
return 'Token(%r, %r)' % (self.type, self.value) | return 'Token(%r, %r)' % (self.type, self.value) | ||||
@@ -193,6 +197,7 @@ class UnlessCallback: | |||||
break | break | ||||
return t | return t | ||||
class CallChain: | class CallChain: | ||||
def __init__(self, callback1, callback2, cond): | def __init__(self, callback1, callback2, cond): | ||||
self.callback1 = callback1 | self.callback1 = callback1 | ||||
@@ -204,16 +209,13 @@ class CallChain: | |||||
return self.callback2(t) if self.cond(t2) else t2 | return self.callback2(t) if self.cond(t2) else t2 | ||||
def _create_unless(terminals, g_regex_flags, re_, use_bytes): | def _create_unless(terminals, g_regex_flags, re_, use_bytes): | ||||
tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | ||||
assert len(tokens_by_type) <= 2, tokens_by_type.keys() | assert len(tokens_by_type) <= 2, tokens_by_type.keys() | ||||
embedded_strs = set() | embedded_strs = set() | ||||
callback = {} | callback = {} | ||||
for retok in tokens_by_type.get(PatternRE, []): | for retok in tokens_by_type.get(PatternRE, []): | ||||
unless = [] # {} | |||||
unless = [] | |||||
for strtok in tokens_by_type.get(PatternStr, []): | for strtok in tokens_by_type.get(PatternStr, []): | ||||
if strtok.priority > retok.priority: | if strtok.priority > retok.priority: | ||||
continue | continue | ||||
@@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes) | |||||
except AssertionError: # Yes, this is what Python provides us.. :/ | except AssertionError: # Yes, this is what Python provides us.. :/ | ||||
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | ||||
mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||||
mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||||
terminals = terminals[max_size:] | terminals = terminals[max_size:] | ||||
return mres | return mres | ||||
def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | ||||
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | ||||
def _regexp_has_newline(r): | def _regexp_has_newline(r): | ||||
r"""Expressions that may indicate newlines in a regexp: | r"""Expressions that may indicate newlines in a regexp: | ||||
- newlines (\n) | - newlines (\n) | ||||
@@ -262,6 +266,7 @@ def _regexp_has_newline(r): | |||||
""" | """ | ||||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | ||||
class Lexer(object): | class Lexer(object): | ||||
"""Lexer interface | """Lexer interface | ||||
@@ -300,7 +305,7 @@ class TraditionalLexer(Lexer): | |||||
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | ||||
self.ignore_types = frozenset(conf.ignore) | self.ignore_types = frozenset(conf.ignore) | ||||
terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||||
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||||
self.terminals = terminals | self.terminals = terminals | ||||
self.user_callbacks = conf.callbacks | self.user_callbacks = conf.callbacks | ||||
self.g_regex_flags = conf.g_regex_flags | self.g_regex_flags = conf.g_regex_flags | ||||
@@ -309,7 +314,7 @@ class TraditionalLexer(Lexer): | |||||
self._mres = None | self._mres = None | ||||
def _build(self): | def _build(self): | ||||
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes) | |||||
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | |||||
assert all(self.callback.values()) | assert all(self.callback.values()) | ||||
for type_, f in self.user_callbacks.items(): | for type_, f in self.user_callbacks.items(): | ||||
@@ -333,7 +338,7 @@ class TraditionalLexer(Lexer): | |||||
if m: | if m: | ||||
return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
def lex(self, state, parser_state): | |||||
def lex(self, state, _parser_state): | |||||
with suppress(EOFError): | with suppress(EOFError): | ||||
while True: | while True: | ||||
yield self.next_token(state) | yield self.next_token(state) | ||||
@@ -372,6 +377,7 @@ class TraditionalLexer(Lexer): | |||||
# EOF | # EOF | ||||
raise EOFError(self) | raise EOFError(self) | ||||
class LexerState: | class LexerState: | ||||
__slots__ = 'text', 'line_ctr', 'last_token' | __slots__ = 'text', 'line_ctr', 'last_token' | ||||
@@ -383,6 +389,7 @@ class LexerState: | |||||
def __copy__(self): | def __copy__(self): | ||||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | return type(self)(self.text, copy(self.line_ctr), self.last_token) | ||||
class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
def __init__(self, conf, states, always_accept=()): | def __init__(self, conf, states, always_accept=()): | ||||
@@ -430,8 +437,9 @@ class ContextualLexer(Lexer): | |||||
token = self.root_lexer.next_token(lexer_state) | token = self.root_lexer.next_token(lexer_state) | ||||
raise UnexpectedToken(token, e.allowed, state=parser_state.position) | raise UnexpectedToken(token, e.allowed, state=parser_state.position) | ||||
class LexerThread: | class LexerThread: | ||||
"A thread that ties a lexer instance and a lexer state, to be used by the parser" | |||||
"""A thread that ties a lexer instance and a lexer state, to be used by the parser""" | |||||
def __init__(self, lexer, text): | def __init__(self, lexer, text): | ||||
self.lexer = lexer | self.lexer = lexer | ||||
@@ -1,4 +1,4 @@ | |||||
"Parses and creates Grammar objects" | |||||
"""Parses and creates Grammar objects""" | |||||
import os.path | import os.path | ||||
import sys | import sys | ||||
@@ -166,6 +166,7 @@ RULES = { | |||||
'literal': ['REGEXP', 'STRING'], | 'literal': ['REGEXP', 'STRING'], | ||||
} | } | ||||
@inline_args | @inline_args | ||||
class EBNF_to_BNF(Transformer_InPlace): | class EBNF_to_BNF(Transformer_InPlace): | ||||
def __init__(self): | def __init__(self): | ||||
@@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor): | |||||
for i, child in enumerate(tree.children): | for i, child in enumerate(tree.children): | ||||
if isinstance(child, Tree) and child.data == 'expansions': | if isinstance(child, Tree) and child.data == 'expansions': | ||||
tree.data = 'expansions' | tree.data = 'expansions' | ||||
tree.children = [self.visit(ST('expansion', [option if i==j else other | |||||
for j, other in enumerate(tree.children)])) | |||||
for option in dedup_list(child.children)] | |||||
tree.children = [self.visit(ST('expansion', [option if i == j else other | |||||
for j, other in enumerate(tree.children)])) | |||||
for option in dedup_list(child.children)] | |||||
self._flatten(tree) | self._flatten(tree) | ||||
break | break | ||||
@@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor): | |||||
class RuleTreeToText(Transformer): | class RuleTreeToText(Transformer): | ||||
def expansions(self, x): | def expansions(self, x): | ||||
return x | return x | ||||
def expansion(self, symbols): | def expansion(self, symbols): | ||||
return symbols, None | return symbols, None | ||||
def alias(self, x): | def alias(self, x): | ||||
(expansion, _alias), alias = x | (expansion, _alias), alias = x | ||||
assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | ||||
@@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace): | |||||
tokenmods, value = args | tokenmods, value = args | ||||
return tokenmods + [value] | return tokenmods + [value] | ||||
class PrepareAnonTerminals(Transformer_InPlace): | class PrepareAnonTerminals(Transformer_InPlace): | ||||
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | |||||
"""Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" | |||||
def __init__(self, terminals): | def __init__(self, terminals): | ||||
self.terminals = terminals | self.terminals = terminals | ||||
@@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
self.i = 0 | self.i = 0 | ||||
self.rule_options = None | self.rule_options = None | ||||
@inline_args | @inline_args | ||||
def pattern(self, p): | def pattern(self, p): | ||||
value = p.value | value = p.value | ||||
@@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
except KeyError: | except KeyError: | ||||
if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | ||||
with suppress(UnicodeEncodeError): | with suppress(UnicodeEncodeError): | ||||
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||||
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||||
term_name = value.upper() | term_name = value.upper() | ||||
if term_name in self.term_set: | if term_name in self.term_set: | ||||
term_name = None | term_name = None | ||||
elif isinstance(p, PatternRE): | elif isinstance(p, PatternRE): | ||||
if p in self.term_reverse: # Kind of a weird placement.name | |||||
if p in self.term_reverse: # Kind of a weird placement.name | |||||
term_name = self.term_reverse[p].name | term_name = self.term_reverse[p].name | ||||
else: | else: | ||||
assert False, p | assert False, p | ||||
@@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
class _ReplaceSymbols(Transformer_InPlace): | class _ReplaceSymbols(Transformer_InPlace): | ||||
" Helper for ApplyTemplates " | |||||
"""Helper for ApplyTemplates""" | |||||
def __init__(self): | def __init__(self): | ||||
self.names = {} | self.names = {} | ||||
@@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace): | |||||
return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | ||||
return self.__default__('template_usage', c, None) | return self.__default__('template_usage', c, None) | ||||
class ApplyTemplates(Transformer_InPlace): | class ApplyTemplates(Transformer_InPlace): | ||||
" Apply the templates, creating new rules that represent the used templates " | |||||
"""Apply the templates, creating new rules that represent the used templates""" | |||||
def __init__(self, rule_defs): | def __init__(self, rule_defs): | ||||
self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
@@ -401,8 +405,6 @@ def _rfind(s, choices): | |||||
return max(s.rfind(c) for c in choices) | return max(s.rfind(c) for c in choices) | ||||
def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
v = literal.value | v = literal.value | ||||
flag_start = _rfind(v, '/"')+1 | flag_start = _rfind(v, '/"')+1 | ||||
@@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace): | |||||
assert start.type == end.type == 'STRING' | assert start.type == end.type == 'STRING' | ||||
start = start.value[1:-1] | start = start.value[1:-1] | ||||
end = end.value[1:-1] | end = end.value[1:-1] | ||||
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end))) | |||||
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 | |||||
regexp = '[%s-%s]' % (start, end) | regexp = '[%s-%s]' % (start, end) | ||||
return ST('pattern', [PatternRE(regexp)]) | return ST('pattern', [PatternRE(regexp)]) | ||||
@@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set): | |||||
return PatternRE(regexp, flags) | return PatternRE(regexp, flags) | ||||
class TerminalTreeToPattern(Transformer): | class TerminalTreeToPattern(Transformer): | ||||
def pattern(self, ps): | def pattern(self, ps): | ||||
p ,= ps | p ,= ps | ||||
@@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer): | |||||
def value(self, v): | def value(self, v): | ||||
return v[0] | return v[0] | ||||
class PrepareSymbols(Transformer_InPlace): | class PrepareSymbols(Transformer_InPlace): | ||||
def value(self, v): | def value(self, v): | ||||
v ,= v | v ,= v | ||||
@@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace): | |||||
return Terminal(Str(v.value), filter_out=v.startswith('_')) | return Terminal(Str(v.value), filter_out=v.startswith('_')) | ||||
assert False | assert False | ||||
def _choice_of_rules(rules): | def _choice_of_rules(rules): | ||||
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | ||||
def nr_deepcopy_tree(t): | def nr_deepcopy_tree(t): | ||||
"Deepcopy tree `t` without recursion" | |||||
"""Deepcopy tree `t` without recursion""" | |||||
return Transformer_NonRecursive(False).transform(t) | return Transformer_NonRecursive(False).transform(t) | ||||
class Grammar: | class Grammar: | ||||
def __init__(self, rule_defs, term_defs, ignore): | def __init__(self, rule_defs, term_defs, ignore): | ||||
self.term_defs = term_defs | self.term_defs = term_defs | ||||
@@ -547,7 +554,7 @@ class Grammar: | |||||
raise GrammarError("Terminals cannot be empty (%s)" % name) | raise GrammarError("Terminals cannot be empty (%s)" % name) | ||||
transformer = PrepareLiterals() * TerminalTreeToPattern() | transformer = PrepareLiterals() * TerminalTreeToPattern() | ||||
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) | |||||
terminals = [TerminalDef(name, transformer.transform(term_tree), priority) | |||||
for name, (term_tree, priority) in term_defs if term_tree] | for name, (term_tree, priority) in term_defs if term_tree] | ||||
# ================= | # ================= | ||||
@@ -566,10 +573,10 @@ class Grammar: | |||||
ebnf_to_bnf = EBNF_to_BNF() | ebnf_to_bnf = EBNF_to_BNF() | ||||
rules = [] | rules = [] | ||||
i = 0 | i = 0 | ||||
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||||
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||||
name, params, rule_tree, options = rule_defs[i] | name, params, rule_tree, options = rule_defs[i] | ||||
i += 1 | i += 1 | ||||
if len(params) != 0: # Dont transform templates | |||||
if len(params) != 0: # Dont transform templates | |||||
continue | continue | ||||
rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | ||||
ebnf_to_bnf.rule_options = rule_options | ebnf_to_bnf.rule_options = rule_options | ||||
@@ -594,7 +601,7 @@ class Grammar: | |||||
for i, (expansion, alias) in enumerate(expansions): | for i, (expansion, alias) in enumerate(expansions): | ||||
if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) | |||||
empty_indices = [x==_EMPTY for x in expansion] | empty_indices = [x==_EMPTY for x in expansion] | ||||
if any(empty_indices): | if any(empty_indices): | ||||
@@ -623,14 +630,13 @@ class Grammar: | |||||
# Remove duplicates | # Remove duplicates | ||||
compiled_rules = list(set(compiled_rules)) | compiled_rules = list(set(compiled_rules)) | ||||
# Filter out unused rules | # Filter out unused rules | ||||
while True: | while True: | ||||
c = len(compiled_rules) | c = len(compiled_rules) | ||||
used_rules = {s for r in compiled_rules | used_rules = {s for r in compiled_rules | ||||
for s in r.expansion | |||||
if isinstance(s, NonTerminal) | |||||
and s != r.origin} | |||||
for s in r.expansion | |||||
if isinstance(s, NonTerminal) | |||||
and s != r.origin} | |||||
used_rules |= {NonTerminal(s) for s in start} | used_rules |= {NonTerminal(s) for s in start} | ||||
compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | ||||
for r in unused: | for r in unused: | ||||
@@ -663,6 +669,7 @@ class PackageResource(object): | |||||
def __repr__(self): | def __repr__(self): | ||||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | ||||
class FromPackageLoader(object): | class FromPackageLoader(object): | ||||
""" | """ | ||||
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | ||||
@@ -699,11 +706,12 @@ class FromPackageLoader(object): | |||||
return PackageResource(self.pkg_name, full_path), text.decode() | return PackageResource(self.pkg_name, full_path), text.decode() | ||||
raise IOError() | raise IOError() | ||||
stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
_imported_grammars = {} | _imported_grammars = {} | ||||
def import_from_grammar_into_namespace(grammar, namespace, aliases): | def import_from_grammar_into_namespace(grammar, namespace, aliases): | ||||
"""Returns all rules and terminals of grammar, prepended | """Returns all rules and terminals of grammar, prepended | ||||
with a 'namespace' prefix, except for those which are aliased. | with a 'namespace' prefix, except for those which are aliased. | ||||
@@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | ||||
return _find_used_symbols(tree) - set(params) | return _find_used_symbols(tree) - set(params) | ||||
def get_namespace_name(name, params): | def get_namespace_name(name, params): | ||||
if params is not None: | if params is not None: | ||||
try: | try: | ||||
@@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
else: | else: | ||||
assert symbol.type == 'RULE' | assert symbol.type == 'RULE' | ||||
_, params, tree, options = imported_rules[symbol] | _, params, tree, options = imported_rules[symbol] | ||||
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} | |||||
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} | |||||
for t in tree.iter_subtrees(): | for t in tree.iter_subtrees(): | ||||
for i, c in enumerate(t.children): | for i, c in enumerate(t.children): | ||||
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | ||||
t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | ||||
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||||
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||||
rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | ||||
return term_defs, rule_defs | return term_defs, rule_defs | ||||
def resolve_term_references(term_defs): | def resolve_term_references(term_defs): | ||||
# TODO Solve with transitive closure (maybe) | # TODO Solve with transitive closure (maybe) | ||||
@@ -798,7 +802,7 @@ def options_from_rule(name, params, *x): | |||||
else: | else: | ||||
expansions ,= x | expansions ,= x | ||||
priority = None | priority = None | ||||
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||||
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||||
keep_all_tokens = name.startswith('!') | keep_all_tokens = name.startswith('!') | ||||
name = name.lstrip('!') | name = name.lstrip('!') | ||||
@@ -812,10 +816,12 @@ def options_from_rule(name, params, *x): | |||||
def symbols_from_strcase(expansion): | def symbols_from_strcase(expansion): | ||||
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | ||||
@inline_args | @inline_args | ||||
class PrepareGrammar(Transformer_InPlace): | class PrepareGrammar(Transformer_InPlace): | ||||
def terminal(self, name): | def terminal(self, name): | ||||
return name | return name | ||||
def nonterminal(self, name): | def nonterminal(self, name): | ||||
return name | return name | ||||
@@ -825,10 +831,11 @@ def _find_used_symbols(tree): | |||||
return {t for x in tree.find_data('expansion') | return {t for x in tree.find_data('expansion') | ||||
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | ||||
class GrammarLoader: | class GrammarLoader: | ||||
ERRORS = [ | ERRORS = [ | ||||
('Unclosed parenthesis', ['a: (\n']), | ('Unclosed parenthesis', ['a: (\n']), | ||||
('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||||
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||||
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | ||||
('Illegal name for rules or terminals', ['Aa:\n']), | ('Illegal name for rules or terminals', ['Aa:\n']), | ||||
('Alias expects lowercase name', ['a: -> "a"\n']), | ('Alias expects lowercase name', ['a: -> "a"\n']), | ||||
@@ -843,8 +850,9 @@ class GrammarLoader: | |||||
def __init__(self, global_keep_all_tokens): | def __init__(self, global_keep_all_tokens): | ||||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||||
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) | |||||
for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||||
callback = ParseTreeBuilder(rules, ST).create_callback() | callback = ParseTreeBuilder(rules, ST).create_callback() | ||||
import re | import re | ||||
lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | ||||
@@ -881,10 +889,10 @@ class GrammarLoader: | |||||
return _imported_grammars[grammar_path] | return _imported_grammars[grammar_path] | ||||
def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | ||||
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | |||||
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" | |||||
try: | try: | ||||
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||||
tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) | |||||
except UnexpectedCharacters as e: | except UnexpectedCharacters as e: | ||||
context = e.get_context(grammar_text) | context = e.get_context(grammar_text) | ||||
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | ||||
@@ -1037,7 +1045,7 @@ class GrammarLoader: | |||||
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
if len(args) != rule_names[sym]: | if len(args) != rule_names[sym]: | ||||
raise GrammarError("Wrong number of template arguments used for %s " | raise GrammarError("Wrong number of template arguments used for %s " | ||||
"(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) | |||||
"(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) | |||||
for sym in _find_used_symbols(expansions): | for sym in _find_used_symbols(expansions): | ||||
if sym.type == 'TERMINAL': | if sym.type == 'TERMINAL': | ||||
if sym not in terminal_names: | if sym not in terminal_names: | ||||
@@ -1046,10 +1054,8 @@ class GrammarLoader: | |||||
if sym not in rule_names and sym not in params: | if sym not in rule_names and sym not in params: | ||||
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
return Grammar(rules, term_defs, ignore_names) | return Grammar(rules, term_defs, ignore_names) | ||||
def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | ||||
return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) | return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) |
@@ -1,7 +1,7 @@ | |||||
from .exceptions import GrammarError | from .exceptions import GrammarError | ||||
from .lexer import Token | from .lexer import Token | ||||
from .tree import Tree | from .tree import Tree | ||||
from .visitors import InlineTransformer # XXX Deprecated | |||||
from .visitors import InlineTransformer # XXX Deprecated | |||||
from .visitors import Transformer_InPlace | from .visitors import Transformer_InPlace | ||||
from .visitors import _vargs_meta, _vargs_meta_inline | from .visitors import _vargs_meta, _vargs_meta_inline | ||||
@@ -20,6 +20,7 @@ class ExpandSingleChild: | |||||
else: | else: | ||||
return self.node_builder(children) | return self.node_builder(children) | ||||
class PropagatePositions: | class PropagatePositions: | ||||
def __init__(self, node_builder): | def __init__(self, node_builder): | ||||
self.node_builder = node_builder | self.node_builder = node_builder | ||||
@@ -87,8 +88,9 @@ class ChildFilter: | |||||
return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
class ChildFilterLALR(ChildFilter): | class ChildFilterLALR(ChildFilter): | ||||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||||
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" | |||||
def __call__(self, children): | def __call__(self, children): | ||||
filtered = [] | filtered = [] | ||||
@@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter): | |||||
return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
class ChildFilterLALR_NoPlaceholders(ChildFilter): | class ChildFilterLALR_NoPlaceholders(ChildFilter): | ||||
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | ||||
def __init__(self, to_include, node_builder): | def __init__(self, to_include, node_builder): | ||||
@@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||||
filtered.append(children[i]) | filtered.append(children[i]) | ||||
return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
def _should_expand(sym): | def _should_expand(sym): | ||||
return not sym.is_term and sym.name.startswith('_') | return not sym.is_term and sym.name.startswith('_') | ||||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | ||||
# Prepare empty_indices as: How many Nones to insert at each index? | # Prepare empty_indices as: How many Nones to insert at each index? | ||||
if _empty_indices: | if _empty_indices: | ||||
@@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi | |||||
# LALR without placeholders | # LALR without placeholders | ||||
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | ||||
class AmbiguousExpander: | class AmbiguousExpander: | ||||
"""Deal with the case where we're expanding children ('_rule') into a parent but the children | """Deal with the case where we're expanding children ('_rule') into a parent but the children | ||||
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | ||||
@@ -167,10 +173,10 @@ class AmbiguousExpander: | |||||
self.to_expand = to_expand | self.to_expand = to_expand | ||||
def __call__(self, children): | def __call__(self, children): | ||||
def _is_ambig_tree(child): | |||||
return hasattr(child, 'data') and child.data == '_ambig' | |||||
def _is_ambig_tree(t): | |||||
return hasattr(t, 'data') and t.data == '_ambig' | |||||
#### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||||
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||||
# All children of an _ambig node should be a derivation of that ambig node, hence | # All children of an _ambig node should be a derivation of that ambig node, hence | ||||
# it is safe to assume that if we see an _ambig node nested within an ambig node | # it is safe to assume that if we see an _ambig node nested within an ambig node | ||||
# it is safe to simply expand it into the parent _ambig node as an alternative derivation. | # it is safe to simply expand it into the parent _ambig node as an alternative derivation. | ||||
@@ -186,15 +192,17 @@ class AmbiguousExpander: | |||||
if not ambiguous: | if not ambiguous: | ||||
return self.node_builder(children) | return self.node_builder(children) | ||||
expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] | |||||
expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] | |||||
return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | ||||
def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | ||||
to_expand = [i for i, sym in enumerate(expansion) | to_expand = [i for i, sym in enumerate(expansion) | ||||
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | ||||
if to_expand: | if to_expand: | ||||
return partial(AmbiguousExpander, to_expand, tree_class) | return partial(AmbiguousExpander, to_expand, tree_class) | ||||
class AmbiguousIntermediateExpander: | class AmbiguousIntermediateExpander: | ||||
""" | """ | ||||
Propagate ambiguous intermediate nodes and their derivations up to the | Propagate ambiguous intermediate nodes and their derivations up to the | ||||
@@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander: | |||||
return self.node_builder(children) | return self.node_builder(children) | ||||
def ptb_inline_args(func): | def ptb_inline_args(func): | ||||
@wraps(func) | @wraps(func) | ||||
def f(children): | def f(children): | ||||
return func(*children) | return func(*children) | ||||
return f | return f | ||||
def inplace_transformer(func): | def inplace_transformer(func): | ||||
@wraps(func) | @wraps(func) | ||||
def f(children): | def f(children): | ||||
@@ -289,9 +299,11 @@ def inplace_transformer(func): | |||||
return func(tree) | return func(tree) | ||||
return f | return f | ||||
def apply_visit_wrapper(func, name, wrapper): | def apply_visit_wrapper(func, name, wrapper): | ||||
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | ||||
raise NotImplementedError("Meta args not supported for internal transformer") | raise NotImplementedError("Meta args not supported for internal transformer") | ||||
@wraps(func) | @wraps(func) | ||||
def f(children): | def f(children): | ||||
return wrapper(func, name, children, None) | return wrapper(func, name, children, None) | ||||
@@ -323,7 +335,6 @@ class ParseTreeBuilder: | |||||
yield rule, wrapper_chain | yield rule, wrapper_chain | ||||
def create_callback(self, transformer=None): | def create_callback(self, transformer=None): | ||||
callbacks = {} | callbacks = {} | ||||
@@ -298,8 +298,8 @@ class Parser: | |||||
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. | # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | ||||
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
if not solutions: | if not solutions: | ||||
expected_tokens = [t.expect for t in to_scan] | |||||
raise UnexpectedEOF(expected_tokens) | |||||
expected_terminals = [t.expect for t in to_scan] | |||||
raise UnexpectedEOF(expected_terminals) | |||||
if self.debug: | if self.debug: | ||||
from .earley_forest import ForestToPyDotVisitor | from .earley_forest import ForestToPyDotVisitor | ||||
@@ -46,14 +46,14 @@ class Tree(object): | |||||
def _pretty(self, level, indent_str): | def _pretty(self, level, indent_str): | ||||
if len(self.children) == 1 and not isinstance(self.children[0], Tree): | if len(self.children) == 1 and not isinstance(self.children[0], Tree): | ||||
return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||||
return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||||
l = [ indent_str*level, self._pretty_label(), '\n' ] | |||||
l = [indent_str*level, self._pretty_label(), '\n'] | |||||
for n in self.children: | for n in self.children: | ||||
if isinstance(n, Tree): | if isinstance(n, Tree): | ||||
l += n._pretty(level+1, indent_str) | l += n._pretty(level+1, indent_str) | ||||
else: | else: | ||||
l += [ indent_str*(level+1), '%s' % (n,), '\n' ] | |||||
l += [indent_str*(level+1), '%s' % (n,), '\n'] | |||||
return l | return l | ||||
@@ -102,8 +102,8 @@ class Tree(object): | |||||
###} | ###} | ||||
def expand_kids_by_index(self, *indices): | def expand_kids_by_index(self, *indices): | ||||
"Expand (inline) children at the given indices" | |||||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
"""Expand (inline) children at the given indices""" | |||||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
kid = self.children[i] | kid = self.children[i] | ||||
self.children[i:i+1] = kid.children | self.children[i:i+1] = kid.children | ||||
@@ -144,12 +144,15 @@ class Tree(object): | |||||
@property | @property | ||||
def line(self): | def line(self): | ||||
return self.meta.line | return self.meta.line | ||||
@property | @property | ||||
def column(self): | def column(self): | ||||
return self.meta.column | return self.meta.column | ||||
@property | @property | ||||
def end_line(self): | def end_line(self): | ||||
return self.meta.end_line | return self.meta.end_line | ||||
@property | @property | ||||
def end_column(self): | def end_column(self): | ||||
return self.meta.end_column | return self.meta.end_column | ||||
@@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs): | |||||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | ||||
graph.write(filename) | graph.write(filename) | ||||
def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | ||||
"""Creates a colorful image that represents the tree (data+children, without meta) | """Creates a colorful image that represents the tree (data+children, without meta) | ||||
@@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||||
subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | ||||
for child in subtree.children] | for child in subtree.children] | ||||
node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data) | |||||
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) | |||||
i[0] += 1 | i[0] += 1 | ||||
graph.add_node(node) | graph.add_node(node) | ||||
@@ -1,10 +1,10 @@ | |||||
import sys | |||||
import os | import os | ||||
from functools import reduce | from functools import reduce | ||||
from ast import literal_eval | from ast import literal_eval | ||||
from collections import deque | from collections import deque | ||||
###{standalone | ###{standalone | ||||
import sys, re | |||||
import logging | import logging | ||||
logger = logging.getLogger("lark") | logger = logging.getLogger("lark") | ||||
logger.addHandler(logging.StreamHandler()) | logger.addHandler(logging.StreamHandler()) | ||||
@@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler()) | |||||
# By default, we should not output any log messages | # By default, we should not output any log messages | ||||
logger.setLevel(logging.CRITICAL) | logger.setLevel(logging.CRITICAL) | ||||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
def classify(seq, key=None, value=None): | def classify(seq, key=None, value=None): | ||||
d = {} | d = {} | ||||
@@ -27,7 +29,7 @@ def classify(seq, key=None, value=None): | |||||
def _deserialize(data, namespace, memo): | def _deserialize(data, namespace, memo): | ||||
if isinstance(data, dict): | if isinstance(data, dict): | ||||
if '__type__' in data: # Object | |||||
if '__type__' in data: # Object | |||||
class_ = namespace[data['__type__']] | class_ = namespace[data['__type__']] | ||||
return class_.deserialize(data, memo) | return class_.deserialize(data, memo) | ||||
elif '@' in data: | elif '@' in data: | ||||
@@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize): | |||||
return _deserialize(data, namespace, memo) | return _deserialize(data, namespace, memo) | ||||
try: | try: | ||||
STRING_TYPE = basestring | STRING_TYPE = basestring | ||||
except NameError: # Python 3 | except NameError: # Python 3 | ||||
@@ -118,10 +119,11 @@ from contextlib import contextmanager | |||||
Str = type(u'') | Str = type(u'') | ||||
try: | try: | ||||
classtype = types.ClassType # Python2 | |||||
classtype = types.ClassType # Python2 | |||||
except AttributeError: | except AttributeError: | ||||
classtype = type # Python3 | classtype = type # Python3 | ||||
def smart_decorator(f, create_decorator): | def smart_decorator(f, create_decorator): | ||||
if isinstance(f, types.FunctionType): | if isinstance(f, types.FunctionType): | ||||
return wraps(f)(create_decorator(f, True)) | return wraps(f)(create_decorator(f, True)) | ||||
@@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator): | |||||
else: | else: | ||||
return create_decorator(f.__func__.__call__, True) | return create_decorator(f.__func__.__call__, True) | ||||
try: | try: | ||||
import regex | import regex | ||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
import sys, re | |||||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
import sre_parse | import sre_parse | ||||
import sre_constants | import sre_constants | ||||
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | ||||
def get_regexp_width(expr): | def get_regexp_width(expr): | ||||
if regex: | if regex: | ||||
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | ||||
@@ -173,9 +174,7 @@ def dedup_list(l): | |||||
preserving the original order of the list. Assumes that | preserving the original order of the list. Assumes that | ||||
the list entries are hashable.""" | the list entries are hashable.""" | ||||
dedup = set() | dedup = set() | ||||
return [ x for x in l if not (x in dedup or dedup.add(x))] | |||||
return [x for x in l if not (x in dedup or dedup.add(x))] | |||||
try: | try: | ||||
@@ -197,8 +196,6 @@ except ImportError: | |||||
pass | pass | ||||
try: | try: | ||||
compare = cmp | compare = cmp | ||||
except NameError: | except NameError: | ||||
@@ -210,7 +207,6 @@ except NameError: | |||||
return -1 | return -1 | ||||
class Enumerator(Serialize): | class Enumerator(Serialize): | ||||
def __init__(self): | def __init__(self): | ||||
self.enums = {} | self.enums = {} | ||||
@@ -8,6 +8,7 @@ from .lexer import Token | |||||
###{standalone | ###{standalone | ||||
from inspect import getmembers, getmro | from inspect import getmembers, getmro | ||||
class Discard(Exception): | class Discard(Exception): | ||||
"""When raising the Discard exception in a transformer callback, | """When raising the Discard exception in a transformer callback, | ||||
that node is discarded and won't appear in the parent. | that node is discarded and won't appear in the parent. | ||||
@@ -16,6 +17,7 @@ class Discard(Exception): | |||||
# Transformers | # Transformers | ||||
class _Decoratable: | class _Decoratable: | ||||
"Provides support for decorating methods with @v_args" | "Provides support for decorating methods with @v_args" | ||||
@@ -107,7 +109,6 @@ class Transformer(_Decoratable): | |||||
except Exception as e: | except Exception as e: | ||||
raise VisitError(token.type, token, e) | raise VisitError(token.type, token, e) | ||||
def _transform_children(self, children): | def _transform_children(self, children): | ||||
for c in children: | for c in children: | ||||
try: | try: | ||||
@@ -148,7 +149,6 @@ class Transformer(_Decoratable): | |||||
return token | return token | ||||
class InlineTransformer(Transformer): # XXX Deprecated | class InlineTransformer(Transformer): # XXX Deprecated | ||||
def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
# Assumes tree is already transformed | # Assumes tree is already transformed | ||||
@@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer): | |||||
q = [tree] | q = [tree] | ||||
while q: | while q: | ||||
t = q.pop() | t = q.pop() | ||||
rev_postfix.append( t ) | |||||
rev_postfix.append(t) | |||||
if isinstance(t, Tree): | if isinstance(t, Tree): | ||||
q += t.children | q += t.children | ||||
@@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer): | |||||
return t | return t | ||||
class Transformer_InPlaceRecursive(Transformer): | class Transformer_InPlaceRecursive(Transformer): | ||||
"Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | ||||
def _transform_tree(self, tree): | def _transform_tree(self, tree): | ||||
@@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase): | |||||
return tree | return tree | ||||
def visit_children_decor(func): | def visit_children_decor(func): | ||||
"See Interpreter" | "See Interpreter" | ||||
@wraps(func) | @wraps(func) | ||||
@@ -338,8 +336,6 @@ class Interpreter(_Decoratable): | |||||
return self.visit_children(tree) | return self.visit_children(tree) | ||||
# Decorators | # Decorators | ||||
def _apply_decorator(obj, decorator, **kwargs): | def _apply_decorator(obj, decorator, **kwargs): | ||||
@@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs): | |||||
return _apply(decorator, **kwargs) | return _apply(decorator, **kwargs) | ||||
def _inline_args__func(func): | def _inline_args__func(func): | ||||
@wraps(func) | @wraps(func) | ||||
def create_decorator(_f, with_self): | def create_decorator(_f, with_self): | ||||
@@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated | |||||
return _apply_decorator(obj, _inline_args__func) | return _apply_decorator(obj, _inline_args__func) | ||||
def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | ||||
def create_decorator(_f, with_self): | def create_decorator(_f, with_self): | ||||
if with_self: | if with_self: | ||||
@@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||||
return f | return f | ||||
def _vargs_inline(f, data, children, meta): | |||||
def _vargs_inline(f, _data, children, _meta): | |||||
return f(*children) | return f(*children) | ||||
def _vargs_meta_inline(f, data, children, meta): | |||||
def _vargs_meta_inline(f, _data, children, meta): | |||||
return f(meta, *children) | return f(meta, *children) | ||||
def _vargs_meta(f, data, children, meta): | |||||
def _vargs_meta(f, _data, children, meta): | |||||
return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | ||||
def _vargs_tree(f, data, children, meta): | def _vargs_tree(f, data, children, meta): | ||||
return f(Tree(data, children, meta)) | return f(Tree(data, children, meta)) | ||||
@@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | ||||
meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | ||||
tree (bool, optional): Provides the entire tree as the argument, instead of the children. | tree (bool, optional): Provides the entire tree as the argument, instead of the children. | ||||
wrapper (function, optional): Provide a function to decorate all methods. | |||||
Example: | Example: | ||||
:: | :: | ||||
@@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
###} | ###} | ||||
#--- Visitor Utilities --- | |||||
# --- Visitor Utilities --- | |||||
class CollapseAmbiguities(Transformer): | class CollapseAmbiguities(Transformer): | ||||
""" | """ | ||||
@@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer): | |||||
""" | """ | ||||
def _ambig(self, options): | def _ambig(self, options): | ||||
return sum(options, []) | return sum(options, []) | ||||
def __default__(self, data, children_lists, meta): | def __default__(self, data, children_lists, meta): | ||||
return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | ||||
def __default_token__(self, t): | def __default_token__(self, t): | ||||
return [t] | return [t] |