Browse Source

Cleanup, and a few PEP8 changes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 4 years ago
parent
commit
5b30ba4841
12 changed files with 141 additions and 107 deletions
  1. +18
    -8
      lark-stubs/lexer.pyi
  2. +2
    -2
      lark/common.py
  3. +4
    -0
      lark/exceptions.py
  4. +0
    -3
      lark/grammar.py
  5. +5
    -4
      lark/lark.py
  6. +19
    -11
      lark/lexer.py
  7. +44
    -38
      lark/load_grammar.py
  8. +18
    -7
      lark/parse_tree_builder.py
  9. +2
    -2
      lark/parsers/earley.py
  10. +10
    -6
      lark/tree.py
  11. +9
    -13
      lark/utils.py
  12. +10
    -13
      lark/visitors.py

+ 18
- 8
lark-stubs/lexer.pyi View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from types import ModuleType
from typing import (
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional,
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern,
)
from abc import abstractmethod, ABC
@@ -100,10 +100,22 @@ class Lexer(ABC):
lex: Callable[..., Iterator[Token]]


class LexerConf:
tokens: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str] = ()
postlex: Any =None
callbacks: Optional[Dict[str, _Callback]] = None
g_regex_flags: int = 0
skip_validation: bool = False
use_bytes: bool = False



class TraditionalLexer(Lexer):
terminals: Collection[TerminalDef]
ignore_types: List[str]
newline_types: List[str]
ignore_types: FrozenSet[str]
newline_types: FrozenSet[str]
user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback]
mres: List[Tuple[REPattern, Dict[int, str]]]
@@ -111,11 +123,7 @@ class TraditionalLexer(Lexer):

def __init__(
self,
terminals: Collection[TerminalDef],
re_: ModuleType,
ignore: Collection[str] = ...,
user_callbacks: Dict[str, _Callback] = ...,
g_regex_flags: int = ...
conf: LexerConf
):
...

@@ -128,6 +136,8 @@ class TraditionalLexer(Lexer):
def lex(self, stream: str) -> Iterator[Token]:
...

def next_token(self, lex_state: Any) -> Token:
...

class ContextualLexer(Lexer):
lexers: Dict[str, TraditionalLexer]


+ 2
- 2
lark/common.py View File

@@ -3,6 +3,7 @@ from .lexer import TerminalDef

###{standalone


class LexerConf(Serialize):
__serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
__serialize_namespace__ = TerminalDef,
@@ -19,11 +20,10 @@ class LexerConf(Serialize):

###}


class ParserConf:
def __init__(self, rules, callbacks, start):
assert isinstance(start, list)
self.rules = rules
self.callbacks = callbacks
self.start = start



+ 4
- 0
lark/exceptions.py View File

@@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger
class LarkError(Exception):
pass


class GrammarError(LarkError):
pass


class ParseError(LarkError):
pass


class LexError(LarkError):
pass


class UnexpectedEOF(ParseError):
def __init__(self, expected):
self.expected = expected


+ 0
- 3
lark/grammar.py View File

@@ -40,14 +40,12 @@ class Terminal(Symbol):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)



class NonTerminal(Symbol):
__serialize_fields__ = 'name',

is_term = False



class RuleOptions(Serialize):
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'

@@ -104,5 +102,4 @@ class Rule(Serialize):
return self.origin == other.origin and self.expansion == other.expansion



###}

+ 5
- 4
lark/lark.py View File

@@ -24,6 +24,7 @@ except ImportError:

###{standalone


class LarkOptions(Serialize):
"""Specifies the options for Lark

@@ -36,7 +37,7 @@ class LarkOptions(Serialize):
debug
Display debug information, such as warnings (default: False)
transformer
Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster)
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches.
maybe_placeholders
@@ -320,7 +321,7 @@ class Lark(Serialize):
# Else, if the user asked to disable priorities, strip them from the
# rules. This allows the Earley parsers to skip an extra forest walk
# for improved performance, if you don't need them (or didn't specify any).
elif self.options.priority == None:
elif self.options.priority is None:
for rule in self.rules:
if rule.options.priority is not None:
rule.options.priority = None
@@ -360,7 +361,7 @@ class Lark(Serialize):
self.rules,
self.options.tree_class or Tree,
self.options.propagate_positions,
self.options.parser!='lalr' and self.options.ambiguity=='explicit',
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
@@ -410,7 +411,7 @@ class Lark(Serialize):
data['parser'],
memo,
self._callbacks,
self.options, # Not all, but multiple attributes are used
self.options, # Not all, but multiple attributes are used
)
self.terminals = self.parser.lexer_conf.tokens
self._terminals_dict = {t.name: t for t in self.terminals}


+ 19
- 11
lark/lexer.py View File

@@ -1,4 +1,4 @@
## Lexer Implementation
# Lexer Implementation

import re

@@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
###{standalone
from copy import copy


class Pattern(Serialize):

def __init__(self, value, flags=()):
@@ -20,6 +21,7 @@ class Pattern(Serialize):
# Pattern Hashing assumes all subclasses have a different priority!
def __hash__(self):
return hash((type(self), self.value, self.flags))

def __eq__(self, other):
return type(self) == type(other) and self.value == other.value and self.flags == other.flags

@@ -53,6 +55,7 @@ class PatternStr(Pattern):
return len(self.value)
max_width = min_width


class PatternRE(Pattern):
__serialize_fields__ = 'value', 'flags', '_width'

@@ -70,6 +73,7 @@ class PatternRE(Pattern):
@property
def min_width(self):
return self._get_width()[0]

@property
def max_width(self):
return self._get_width()[1]
@@ -139,7 +143,7 @@ class Token(Str):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))

def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value)
@@ -193,6 +197,7 @@ class UnlessCallback:
break
return t


class CallChain:
def __init__(self, callback1, callback2, cond):
self.callback1 = callback1
@@ -204,16 +209,13 @@ class CallChain:
return self.callback2(t) if self.cond(t2) else t2





def _create_unless(terminals, g_regex_flags, re_, use_bytes):
tokens_by_type = classify(terminals, lambda t: type(t.pattern))
assert len(tokens_by_type) <= 2, tokens_by_type.keys()
embedded_strs = set()
callback = {}
for retok in tokens_by_type.get(PatternRE, []):
unless = [] # {}
unless = []
for strtok in tokens_by_type.get(PatternStr, []):
if strtok.priority > retok.priority:
continue
@@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes)
except AssertionError: # Yes, this is what Python provides us.. :/
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes)

mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
mres.append((mre, {i: n for n, i in mre.groupindex.items()}))
terminals = terminals[max_size:]
return mres


def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False):
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes)


def _regexp_has_newline(r):
r"""Expressions that may indicate newlines in a regexp:
- newlines (\n)
@@ -262,6 +266,7 @@ def _regexp_has_newline(r):
"""
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)


class Lexer(object):
"""Lexer interface

@@ -300,7 +305,7 @@ class TraditionalLexer(Lexer):
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
self.ignore_types = frozenset(conf.ignore)

terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
self.terminals = terminals
self.user_callbacks = conf.callbacks
self.g_regex_flags = conf.g_regex_flags
@@ -309,7 +314,7 @@ class TraditionalLexer(Lexer):
self._mres = None

def _build(self):
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes)
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
assert all(self.callback.values())

for type_, f in self.user_callbacks.items():
@@ -333,7 +338,7 @@ class TraditionalLexer(Lexer):
if m:
return m.group(0), type_from_index[m.lastindex]

def lex(self, state, parser_state):
def lex(self, state, _parser_state):
with suppress(EOFError):
while True:
yield self.next_token(state)
@@ -372,6 +377,7 @@ class TraditionalLexer(Lexer):
# EOF
raise EOFError(self)


class LexerState:
__slots__ = 'text', 'line_ctr', 'last_token'

@@ -383,6 +389,7 @@ class LexerState:
def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)


class ContextualLexer(Lexer):

def __init__(self, conf, states, always_accept=()):
@@ -430,8 +437,9 @@ class ContextualLexer(Lexer):
token = self.root_lexer.next_token(lexer_state)
raise UnexpectedToken(token, e.allowed, state=parser_state.position)


class LexerThread:
"A thread that ties a lexer instance and a lexer state, to be used by the parser"
"""A thread that ties a lexer instance and a lexer state, to be used by the parser"""

def __init__(self, lexer, text):
self.lexer = lexer


+ 44
- 38
lark/load_grammar.py View File

@@ -1,4 +1,4 @@
"Parses and creates Grammar objects"
"""Parses and creates Grammar objects"""

import os.path
import sys
@@ -166,6 +166,7 @@ RULES = {
'literal': ['REGEXP', 'STRING'],
}


@inline_args
class EBNF_to_BNF(Transformer_InPlace):
def __init__(self):
@@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor):
for i, child in enumerate(tree.children):
if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions'
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in dedup_list(child.children)]
tree.children = [self.visit(ST('expansion', [option if i == j else other
for j, other in enumerate(tree.children)]))
for option in dedup_list(child.children)]
self._flatten(tree)
break

@@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor):
class RuleTreeToText(Transformer):
def expansions(self, x):
return x

def expansion(self, symbols):
return symbols, None

def alias(self, x):
(expansion, _alias), alias = x
assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
@@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace):
tokenmods, value = args
return tokenmods + [value]


class PrepareAnonTerminals(Transformer_InPlace):
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"
"""Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""

def __init__(self, terminals):
self.terminals = terminals
@@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace):
self.i = 0
self.rule_options = None


@inline_args
def pattern(self, p):
value = p.value
@@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace):
except KeyError:
if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set:
with suppress(UnicodeEncodeError):
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names
term_name = value.upper()

if term_name in self.term_set:
term_name = None

elif isinstance(p, PatternRE):
if p in self.term_reverse: # Kind of a weird placement.name
if p in self.term_reverse: # Kind of a weird placement.name
term_name = self.term_reverse[p].name
else:
assert False, p
@@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace):


class _ReplaceSymbols(Transformer_InPlace):
" Helper for ApplyTemplates "
"""Helper for ApplyTemplates"""

def __init__(self):
self.names = {}
@@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace):
return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
return self.__default__('template_usage', c, None)


class ApplyTemplates(Transformer_InPlace):
" Apply the templates, creating new rules that represent the used templates "
"""Apply the templates, creating new rules that represent the used templates"""

def __init__(self, rule_defs):
self.rule_defs = rule_defs
@@ -401,8 +405,6 @@ def _rfind(s, choices):
return max(s.rfind(c) for c in choices)




def _literal_to_pattern(literal):
v = literal.value
flag_start = _rfind(v, '/"')+1
@@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace):
assert start.type == end.type == 'STRING'
start = start.value[1:-1]
end = end.value[1:-1]
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end)))
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
regexp = '[%s-%s]' % (start, end)
return ST('pattern', [PatternRE(regexp)])

@@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set):

return PatternRE(regexp, flags)


class TerminalTreeToPattern(Transformer):
def pattern(self, ps):
p ,= ps
@@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer):
def value(self, v):
return v[0]


class PrepareSymbols(Transformer_InPlace):
def value(self, v):
v ,= v
@@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace):
return Terminal(Str(v.value), filter_out=v.startswith('_'))
assert False


def _choice_of_rules(rules):
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])


def nr_deepcopy_tree(t):
"Deepcopy tree `t` without recursion"
"""Deepcopy tree `t` without recursion"""
return Transformer_NonRecursive(False).transform(t)


class Grammar:
def __init__(self, rule_defs, term_defs, ignore):
self.term_defs = term_defs
@@ -547,7 +554,7 @@ class Grammar:
raise GrammarError("Terminals cannot be empty (%s)" % name)

transformer = PrepareLiterals() * TerminalTreeToPattern()
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority)
terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
for name, (term_tree, priority) in term_defs if term_tree]

# =================
@@ -566,10 +573,10 @@ class Grammar:
ebnf_to_bnf = EBNF_to_BNF()
rules = []
i = 0
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
name, params, rule_tree, options = rule_defs[i]
i += 1
if len(params) != 0: # Dont transform templates
if len(params) != 0: # Dont transform templates
continue
rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
ebnf_to_bnf.rule_options = rule_options
@@ -594,7 +601,7 @@ class Grammar:

for i, (expansion, alias) in enumerate(expansions):
if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))

empty_indices = [x==_EMPTY for x in expansion]
if any(empty_indices):
@@ -623,14 +630,13 @@ class Grammar:
# Remove duplicates
compiled_rules = list(set(compiled_rules))


# Filter out unused rules
while True:
c = len(compiled_rules)
used_rules = {s for r in compiled_rules
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
used_rules |= {NonTerminal(s) for s in start}
compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
for r in unused:
@@ -663,6 +669,7 @@ class PackageResource(object):
def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path)


class FromPackageLoader(object):
"""
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
@@ -699,11 +706,12 @@ class FromPackageLoader(object):
return PackageResource(self.pkg_name, full_path), text.decode()
raise IOError()

stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

_imported_grammars = {}


def import_from_grammar_into_namespace(grammar, namespace, aliases):
"""Returns all rules and terminals of grammar, prepended
with a 'namespace' prefix, except for those which are aliased.
@@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
return _find_used_symbols(tree) - set(params)



def get_namespace_name(name, params):
if params is not None:
try:
@@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
else:
assert symbol.type == 'RULE'
_, params, tree, options = imported_rules[symbol]
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params}
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params}
for t in tree.iter_subtrees():
for i, c in enumerate(t.children):
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
t.children[i] = Token(c.type, get_namespace_name(c, params_map))
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries
rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options))


return term_defs, rule_defs



def resolve_term_references(term_defs):
# TODO Solve with transitive closure (maybe)

@@ -798,7 +802,7 @@ def options_from_rule(name, params, *x):
else:
expansions ,= x
priority = None
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
@@ -812,10 +816,12 @@ def options_from_rule(name, params, *x):
def symbols_from_strcase(expansion):
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]


@inline_args
class PrepareGrammar(Transformer_InPlace):
def terminal(self, name):
return name

def nonterminal(self, name):
return name

@@ -825,10 +831,11 @@ def _find_used_symbols(tree):
return {t for x in tree.find_data('expansion')
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}


class GrammarLoader:
ERRORS = [
('Unclosed parenthesis', ['a: (\n']),
('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
('Illegal name for rules or terminals', ['Aa:\n']),
('Alias expects lowercase name', ['a: -> "a"\n']),
@@ -843,8 +850,9 @@ class GrammarLoader:
def __init__(self, global_keep_all_tokens):
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)]
rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
for r, _p, xs, o in rules for i, x in enumerate(xs)]
callback = ParseTreeBuilder(rules, ST).create_callback()
import re
lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
@@ -881,10 +889,10 @@ class GrammarLoader:
return _imported_grammars[grammar_path]

def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""

try:
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') )
tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n'))
except UnexpectedCharacters as e:
context = e.get_context(grammar_text)
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
@@ -1037,7 +1045,7 @@ class GrammarLoader:
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name))
if len(args) != rule_names[sym]:
raise GrammarError("Wrong number of template arguments used for %s "
"(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name))
"(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name))
for sym in _find_used_symbols(expansions):
if sym.type == 'TERMINAL':
if sym not in terminal_names:
@@ -1046,10 +1054,8 @@ class GrammarLoader:
if sym not in rule_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))


return Grammar(rules, term_defs, ignore_names)



def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)

+ 18
- 7
lark/parse_tree_builder.py View File

@@ -1,7 +1,7 @@
from .exceptions import GrammarError
from .lexer import Token
from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated
from .visitors import InlineTransformer # XXX Deprecated
from .visitors import Transformer_InPlace
from .visitors import _vargs_meta, _vargs_meta_inline

@@ -20,6 +20,7 @@ class ExpandSingleChild:
else:
return self.node_builder(children)


class PropagatePositions:
def __init__(self, node_builder):
self.node_builder = node_builder
@@ -87,8 +88,9 @@ class ChildFilter:

return self.node_builder(filtered)


class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""

def __call__(self, children):
filtered = []
@@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter):

return self.node_builder(filtered)


class ChildFilterLALR_NoPlaceholders(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __init__(self, to_include, node_builder):
@@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter):
filtered.append(children[i])
return self.node_builder(filtered)


def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_')


def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
@@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi
# LALR without placeholders
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])


class AmbiguousExpander:
"""Deal with the case where we're expanding children ('_rule') into a parent but the children
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
@@ -167,10 +173,10 @@ class AmbiguousExpander:
self.to_expand = to_expand

def __call__(self, children):
def _is_ambig_tree(child):
return hasattr(child, 'data') and child.data == '_ambig'
def _is_ambig_tree(t):
return hasattr(t, 'data') and t.data == '_ambig'

#### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
# All children of an _ambig node should be a derivation of that ambig node, hence
# it is safe to assume that if we see an _ambig node nested within an ambig node
# it is safe to simply expand it into the parent _ambig node as an alternative derivation.
@@ -186,15 +192,17 @@ class AmbiguousExpander:
if not ambiguous:
return self.node_builder(children)

expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])


def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
to_expand = [i for i, sym in enumerate(expansion)
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
if to_expand:
return partial(AmbiguousExpander, to_expand, tree_class)


class AmbiguousIntermediateExpander:
"""
Propagate ambiguous intermediate nodes and their derivations up to the
@@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander:

return self.node_builder(children)


def ptb_inline_args(func):
@wraps(func)
def f(children):
return func(*children)
return f


def inplace_transformer(func):
@wraps(func)
def f(children):
@@ -289,9 +299,11 @@ def inplace_transformer(func):
return func(tree)
return f


def apply_visit_wrapper(func, name, wrapper):
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
raise NotImplementedError("Meta args not supported for internal transformer")

@wraps(func)
def f(children):
return wrapper(func, name, children, None)
@@ -323,7 +335,6 @@ class ParseTreeBuilder:

yield rule, wrapper_chain


def create_callback(self, transformer=None):
callbacks = {}



+ 2
- 2
lark/parsers/earley.py View File

@@ -298,8 +298,8 @@ class Parser:
# this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions:
expected_tokens = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_tokens)
expected_terminals = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_terminals)

if self.debug:
from .earley_forest import ForestToPyDotVisitor


+ 10
- 6
lark/tree.py View File

@@ -46,14 +46,14 @@ class Tree(object):

def _pretty(self, level, indent_str):
if len(self.children) == 1 and not isinstance(self.children[0], Tree):
return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']

l = [ indent_str*level, self._pretty_label(), '\n' ]
l = [indent_str*level, self._pretty_label(), '\n']
for n in self.children:
if isinstance(n, Tree):
l += n._pretty(level+1, indent_str)
else:
l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
l += [indent_str*(level+1), '%s' % (n,), '\n']

return l

@@ -102,8 +102,8 @@ class Tree(object):
###}

def expand_kids_by_index(self, *indices):
"Expand (inline) children at the given indices"
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i]
self.children[i:i+1] = kid.children

@@ -144,12 +144,15 @@ class Tree(object):
@property
def line(self):
return self.meta.line

@property
def column(self):
return self.meta.column

@property
def end_line(self):
return self.meta.end_line

@property
def end_column(self):
return self.meta.end_column
@@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs):
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
graph.write(filename)


def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
"""Creates a colorful image that represents the tree (data+children, without meta)

@@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):

subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
for child in subtree.children]
node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data)
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
i[0] += 1
graph.add_node(node)



+ 9
- 13
lark/utils.py View File

@@ -1,10 +1,10 @@
import sys
import os
from functools import reduce
from ast import literal_eval
from collections import deque

###{standalone
import sys, re
import logging
logger = logging.getLogger("lark")
logger.addHandler(logging.StreamHandler())
@@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)

Py36 = (sys.version_info[:2] >= (3, 6))


def classify(seq, key=None, value=None):
d = {}
@@ -27,7 +29,7 @@ def classify(seq, key=None, value=None):

def _deserialize(data, namespace, memo):
if isinstance(data, dict):
if '__type__' in data: # Object
if '__type__' in data: # Object
class_ = namespace[data['__type__']]
return class_.deserialize(data, memo)
elif '@' in data:
@@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize):
return _deserialize(data, namespace, memo)



try:
STRING_TYPE = basestring
except NameError: # Python 3
@@ -118,10 +119,11 @@ from contextlib import contextmanager

Str = type(u'')
try:
classtype = types.ClassType # Python2
classtype = types.ClassType # Python2
except AttributeError:
classtype = type # Python3


def smart_decorator(f, create_decorator):
if isinstance(f, types.FunctionType):
return wraps(f)(create_decorator(f, True))
@@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator):
else:
return create_decorator(f.__func__.__call__, True)


try:
import regex
except ImportError:
regex = None

import sys, re
Py36 = (sys.version_info[:2] >= (3, 6))

import sre_parse
import sre_constants
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')

def get_regexp_width(expr):
if regex:
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
@@ -173,9 +174,7 @@ def dedup_list(l):
preserving the original order of the list. Assumes that
the list entries are hashable."""
dedup = set()
return [ x for x in l if not (x in dedup or dedup.add(x))]


return [x for x in l if not (x in dedup or dedup.add(x))]


try:
@@ -197,8 +196,6 @@ except ImportError:
pass




try:
compare = cmp
except NameError:
@@ -210,7 +207,6 @@ except NameError:
return -1



class Enumerator(Serialize):
def __init__(self):
self.enums = {}


+ 10
- 13
lark/visitors.py View File

@@ -8,6 +8,7 @@ from .lexer import Token
###{standalone
from inspect import getmembers, getmro


class Discard(Exception):
"""When raising the Discard exception in a transformer callback,
that node is discarded and won't appear in the parent.
@@ -16,6 +17,7 @@ class Discard(Exception):

# Transformers


class _Decoratable:
"Provides support for decorating methods with @v_args"

@@ -107,7 +109,6 @@ class Transformer(_Decoratable):
except Exception as e:
raise VisitError(token.type, token, e)


def _transform_children(self, children):
for c in children:
try:
@@ -148,7 +149,6 @@ class Transformer(_Decoratable):
return token



class InlineTransformer(Transformer): # XXX Deprecated
def _call_userfunc(self, tree, new_children=None):
# Assumes tree is already transformed
@@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer):
q = [tree]
while q:
t = q.pop()
rev_postfix.append( t )
rev_postfix.append(t)
if isinstance(t, Tree):
q += t.children

@@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer):
return t



class Transformer_InPlaceRecursive(Transformer):
"Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
def _transform_tree(self, tree):
@@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase):
return tree



def visit_children_decor(func):
"See Interpreter"
@wraps(func)
@@ -338,8 +336,6 @@ class Interpreter(_Decoratable):
return self.visit_children(tree)




# Decorators

def _apply_decorator(obj, decorator, **kwargs):
@@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs):
return _apply(decorator, **kwargs)



def _inline_args__func(func):
@wraps(func)
def create_decorator(_f, with_self):
@@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated
return _apply_decorator(obj, _inline_args__func)



def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
def create_decorator(_f, with_self):
if with_self:
@@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
return f


def _vargs_inline(f, data, children, meta):
def _vargs_inline(f, _data, children, _meta):
return f(*children)
def _vargs_meta_inline(f, data, children, meta):
def _vargs_meta_inline(f, _data, children, meta):
return f(meta, *children)
def _vargs_meta(f, data, children, meta):
def _vargs_meta(f, _data, children, meta):
return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
def _vargs_tree(f, data, children, meta):
return f(Tree(data, children, meta))
@@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first)
tree (bool, optional): Provides the entire tree as the argument, instead of the children.
wrapper (function, optional): Provide a function to decorate all methods.

Example:
::
@@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
###}


#--- Visitor Utilities ---
# --- Visitor Utilities ---

class CollapseAmbiguities(Transformer):
"""
@@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer):
"""
def _ambig(self, options):
return sum(options, [])

def __default__(self, data, children_lists, meta):
return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]

def __default_token__(self, t):
return [t]

Loading…
Cancel
Save