Browse Source

Cleanup, and a few PEP8 changes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 4 years ago
parent
commit
5b30ba4841
12 changed files with 141 additions and 107 deletions
  1. +18
    -8
      lark-stubs/lexer.pyi
  2. +2
    -2
      lark/common.py
  3. +4
    -0
      lark/exceptions.py
  4. +0
    -3
      lark/grammar.py
  5. +5
    -4
      lark/lark.py
  6. +19
    -11
      lark/lexer.py
  7. +44
    -38
      lark/load_grammar.py
  8. +18
    -7
      lark/parse_tree_builder.py
  9. +2
    -2
      lark/parsers/earley.py
  10. +10
    -6
      lark/tree.py
  11. +9
    -13
      lark/utils.py
  12. +10
    -13
      lark/visitors.py

+ 18
- 8
lark-stubs/lexer.pyi View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from types import ModuleType from types import ModuleType
from typing import ( from typing import (
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional,
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, Pattern as REPattern,
) )
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
@@ -100,10 +100,22 @@ class Lexer(ABC):
lex: Callable[..., Iterator[Token]] lex: Callable[..., Iterator[Token]]




class LexerConf:
tokens: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str] = ()
postlex: Any =None
callbacks: Optional[Dict[str, _Callback]] = None
g_regex_flags: int = 0
skip_validation: bool = False
use_bytes: bool = False



class TraditionalLexer(Lexer): class TraditionalLexer(Lexer):
terminals: Collection[TerminalDef] terminals: Collection[TerminalDef]
ignore_types: List[str]
newline_types: List[str]
ignore_types: FrozenSet[str]
newline_types: FrozenSet[str]
user_callbacks: Dict[str, _Callback] user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback] callback: Dict[str, _Callback]
mres: List[Tuple[REPattern, Dict[int, str]]] mres: List[Tuple[REPattern, Dict[int, str]]]
@@ -111,11 +123,7 @@ class TraditionalLexer(Lexer):


def __init__( def __init__(
self, self,
terminals: Collection[TerminalDef],
re_: ModuleType,
ignore: Collection[str] = ...,
user_callbacks: Dict[str, _Callback] = ...,
g_regex_flags: int = ...
conf: LexerConf
): ):
... ...


@@ -128,6 +136,8 @@ class TraditionalLexer(Lexer):
def lex(self, stream: str) -> Iterator[Token]: def lex(self, stream: str) -> Iterator[Token]:
... ...


def next_token(self, lex_state: Any) -> Token:
...


class ContextualLexer(Lexer): class ContextualLexer(Lexer):
lexers: Dict[str, TraditionalLexer] lexers: Dict[str, TraditionalLexer]


+ 2
- 2
lark/common.py View File

@@ -3,6 +3,7 @@ from .lexer import TerminalDef


###{standalone ###{standalone



class LexerConf(Serialize): class LexerConf(Serialize):
__serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
__serialize_namespace__ = TerminalDef, __serialize_namespace__ = TerminalDef,
@@ -19,11 +20,10 @@ class LexerConf(Serialize):


###} ###}



class ParserConf: class ParserConf:
def __init__(self, rules, callbacks, start): def __init__(self, rules, callbacks, start):
assert isinstance(start, list) assert isinstance(start, list)
self.rules = rules self.rules = rules
self.callbacks = callbacks self.callbacks = callbacks
self.start = start self.start = start



+ 4
- 0
lark/exceptions.py View File

@@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger
class LarkError(Exception): class LarkError(Exception):
pass pass



class GrammarError(LarkError): class GrammarError(LarkError):
pass pass



class ParseError(LarkError): class ParseError(LarkError):
pass pass



class LexError(LarkError): class LexError(LarkError):
pass pass



class UnexpectedEOF(ParseError): class UnexpectedEOF(ParseError):
def __init__(self, expected): def __init__(self, expected):
self.expected = expected self.expected = expected


+ 0
- 3
lark/grammar.py View File

@@ -40,14 +40,12 @@ class Terminal(Symbol):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)





class NonTerminal(Symbol): class NonTerminal(Symbol):
__serialize_fields__ = 'name', __serialize_fields__ = 'name',


is_term = False is_term = False





class RuleOptions(Serialize): class RuleOptions(Serialize):
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'


@@ -104,5 +102,4 @@ class Rule(Serialize):
return self.origin == other.origin and self.expansion == other.expansion return self.origin == other.origin and self.expansion == other.expansion





###} ###}

+ 5
- 4
lark/lark.py View File

@@ -24,6 +24,7 @@ except ImportError:


###{standalone ###{standalone



class LarkOptions(Serialize): class LarkOptions(Serialize):
"""Specifies the options for Lark """Specifies the options for Lark


@@ -36,7 +37,7 @@ class LarkOptions(Serialize):
debug debug
Display debug information, such as warnings (default: False) Display debug information, such as warnings (default: False)
transformer transformer
Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster)
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches. Propagates (line, column, end_line, end_column) attributes into all tree branches.
maybe_placeholders maybe_placeholders
@@ -320,7 +321,7 @@ class Lark(Serialize):
# Else, if the user asked to disable priorities, strip them from the # Else, if the user asked to disable priorities, strip them from the
# rules. This allows the Earley parsers to skip an extra forest walk # rules. This allows the Earley parsers to skip an extra forest walk
# for improved performance, if you don't need them (or didn't specify any). # for improved performance, if you don't need them (or didn't specify any).
elif self.options.priority == None:
elif self.options.priority is None:
for rule in self.rules: for rule in self.rules:
if rule.options.priority is not None: if rule.options.priority is not None:
rule.options.priority = None rule.options.priority = None
@@ -360,7 +361,7 @@ class Lark(Serialize):
self.rules, self.rules,
self.options.tree_class or Tree, self.options.tree_class or Tree,
self.options.propagate_positions, self.options.propagate_positions,
self.options.parser!='lalr' and self.options.ambiguity=='explicit',
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
self.options.maybe_placeholders self.options.maybe_placeholders
) )
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
@@ -410,7 +411,7 @@ class Lark(Serialize):
data['parser'], data['parser'],
memo, memo,
self._callbacks, self._callbacks,
self.options, # Not all, but multiple attributes are used
self.options, # Not all, but multiple attributes are used
) )
self.terminals = self.parser.lexer_conf.tokens self.terminals = self.parser.lexer_conf.tokens
self._terminals_dict = {t.name: t for t in self.terminals} self._terminals_dict = {t.name: t for t in self.terminals}


+ 19
- 11
lark/lexer.py View File

@@ -1,4 +1,4 @@
## Lexer Implementation
# Lexer Implementation


import re import re


@@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
###{standalone ###{standalone
from copy import copy from copy import copy



class Pattern(Serialize): class Pattern(Serialize):


def __init__(self, value, flags=()): def __init__(self, value, flags=()):
@@ -20,6 +21,7 @@ class Pattern(Serialize):
# Pattern Hashing assumes all subclasses have a different priority! # Pattern Hashing assumes all subclasses have a different priority!
def __hash__(self): def __hash__(self):
return hash((type(self), self.value, self.flags)) return hash((type(self), self.value, self.flags))

def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.value == other.value and self.flags == other.flags return type(self) == type(other) and self.value == other.value and self.flags == other.flags


@@ -53,6 +55,7 @@ class PatternStr(Pattern):
return len(self.value) return len(self.value)
max_width = min_width max_width = min_width



class PatternRE(Pattern): class PatternRE(Pattern):
__serialize_fields__ = 'value', 'flags', '_width' __serialize_fields__ = 'value', 'flags', '_width'


@@ -70,6 +73,7 @@ class PatternRE(Pattern):
@property @property
def min_width(self): def min_width(self):
return self._get_width()[0] return self._get_width()[0]

@property @property
def max_width(self): def max_width(self):
return self._get_width()[1] return self._get_width()[1]
@@ -139,7 +143,7 @@ class Token(Str):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)


def __reduce__(self): def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))


def __repr__(self): def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value) return 'Token(%r, %r)' % (self.type, self.value)
@@ -193,6 +197,7 @@ class UnlessCallback:
break break
return t return t



class CallChain: class CallChain:
def __init__(self, callback1, callback2, cond): def __init__(self, callback1, callback2, cond):
self.callback1 = callback1 self.callback1 = callback1
@@ -204,16 +209,13 @@ class CallChain:
return self.callback2(t) if self.cond(t2) else t2 return self.callback2(t) if self.cond(t2) else t2







def _create_unless(terminals, g_regex_flags, re_, use_bytes): def _create_unless(terminals, g_regex_flags, re_, use_bytes):
tokens_by_type = classify(terminals, lambda t: type(t.pattern)) tokens_by_type = classify(terminals, lambda t: type(t.pattern))
assert len(tokens_by_type) <= 2, tokens_by_type.keys() assert len(tokens_by_type) <= 2, tokens_by_type.keys()
embedded_strs = set() embedded_strs = set()
callback = {} callback = {}
for retok in tokens_by_type.get(PatternRE, []): for retok in tokens_by_type.get(PatternRE, []):
unless = [] # {}
unless = []
for strtok in tokens_by_type.get(PatternStr, []): for strtok in tokens_by_type.get(PatternStr, []):
if strtok.priority > retok.priority: if strtok.priority > retok.priority:
continue continue
@@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes)
except AssertionError: # Yes, this is what Python provides us.. :/ except AssertionError: # Yes, this is what Python provides us.. :/
return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes)


mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
mres.append((mre, {i: n for n, i in mre.groupindex.items()}))
terminals = terminals[max_size:] terminals = terminals[max_size:]
return mres return mres



def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False):
return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes)



def _regexp_has_newline(r): def _regexp_has_newline(r):
r"""Expressions that may indicate newlines in a regexp: r"""Expressions that may indicate newlines in a regexp:
- newlines (\n) - newlines (\n)
@@ -262,6 +266,7 @@ def _regexp_has_newline(r):
""" """
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)



class Lexer(object): class Lexer(object):
"""Lexer interface """Lexer interface


@@ -300,7 +305,7 @@ class TraditionalLexer(Lexer):
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
self.ignore_types = frozenset(conf.ignore) self.ignore_types = frozenset(conf.ignore)


terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
self.terminals = terminals self.terminals = terminals
self.user_callbacks = conf.callbacks self.user_callbacks = conf.callbacks
self.g_regex_flags = conf.g_regex_flags self.g_regex_flags = conf.g_regex_flags
@@ -309,7 +314,7 @@ class TraditionalLexer(Lexer):
self._mres = None self._mres = None


def _build(self): def _build(self):
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes)
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
assert all(self.callback.values()) assert all(self.callback.values())


for type_, f in self.user_callbacks.items(): for type_, f in self.user_callbacks.items():
@@ -333,7 +338,7 @@ class TraditionalLexer(Lexer):
if m: if m:
return m.group(0), type_from_index[m.lastindex] return m.group(0), type_from_index[m.lastindex]


def lex(self, state, parser_state):
def lex(self, state, _parser_state):
with suppress(EOFError): with suppress(EOFError):
while True: while True:
yield self.next_token(state) yield self.next_token(state)
@@ -372,6 +377,7 @@ class TraditionalLexer(Lexer):
# EOF # EOF
raise EOFError(self) raise EOFError(self)



class LexerState: class LexerState:
__slots__ = 'text', 'line_ctr', 'last_token' __slots__ = 'text', 'line_ctr', 'last_token'


@@ -383,6 +389,7 @@ class LexerState:
def __copy__(self): def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token) return type(self)(self.text, copy(self.line_ctr), self.last_token)



class ContextualLexer(Lexer): class ContextualLexer(Lexer):


def __init__(self, conf, states, always_accept=()): def __init__(self, conf, states, always_accept=()):
@@ -430,8 +437,9 @@ class ContextualLexer(Lexer):
token = self.root_lexer.next_token(lexer_state) token = self.root_lexer.next_token(lexer_state)
raise UnexpectedToken(token, e.allowed, state=parser_state.position) raise UnexpectedToken(token, e.allowed, state=parser_state.position)



class LexerThread: class LexerThread:
"A thread that ties a lexer instance and a lexer state, to be used by the parser"
"""A thread that ties a lexer instance and a lexer state, to be used by the parser"""


def __init__(self, lexer, text): def __init__(self, lexer, text):
self.lexer = lexer self.lexer = lexer


+ 44
- 38
lark/load_grammar.py View File

@@ -1,4 +1,4 @@
"Parses and creates Grammar objects"
"""Parses and creates Grammar objects"""


import os.path import os.path
import sys import sys
@@ -166,6 +166,7 @@ RULES = {
'literal': ['REGEXP', 'STRING'], 'literal': ['REGEXP', 'STRING'],
} }



@inline_args @inline_args
class EBNF_to_BNF(Transformer_InPlace): class EBNF_to_BNF(Transformer_InPlace):
def __init__(self): def __init__(self):
@@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor):
for i, child in enumerate(tree.children): for i, child in enumerate(tree.children):
if isinstance(child, Tree) and child.data == 'expansions': if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions' tree.data = 'expansions'
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in dedup_list(child.children)]
tree.children = [self.visit(ST('expansion', [option if i == j else other
for j, other in enumerate(tree.children)]))
for option in dedup_list(child.children)]
self._flatten(tree) self._flatten(tree)
break break


@@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor):
class RuleTreeToText(Transformer): class RuleTreeToText(Transformer):
def expansions(self, x): def expansions(self, x):
return x return x

def expansion(self, symbols): def expansion(self, symbols):
return symbols, None return symbols, None

def alias(self, x): def alias(self, x):
(expansion, _alias), alias = x (expansion, _alias), alias = x
assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
@@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace):
tokenmods, value = args tokenmods, value = args
return tokenmods + [value] return tokenmods + [value]



class PrepareAnonTerminals(Transformer_InPlace): class PrepareAnonTerminals(Transformer_InPlace):
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"
"""Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""


def __init__(self, terminals): def __init__(self, terminals):
self.terminals = terminals self.terminals = terminals
@@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace):
self.i = 0 self.i = 0
self.rule_options = None self.rule_options = None



@inline_args @inline_args
def pattern(self, p): def pattern(self, p):
value = p.value value = p.value
@@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace):
except KeyError: except KeyError:
if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set:
with suppress(UnicodeEncodeError): with suppress(UnicodeEncodeError):
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names
value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names
term_name = value.upper() term_name = value.upper()


if term_name in self.term_set: if term_name in self.term_set:
term_name = None term_name = None


elif isinstance(p, PatternRE): elif isinstance(p, PatternRE):
if p in self.term_reverse: # Kind of a weird placement.name
if p in self.term_reverse: # Kind of a weird placement.name
term_name = self.term_reverse[p].name term_name = self.term_reverse[p].name
else: else:
assert False, p assert False, p
@@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace):




class _ReplaceSymbols(Transformer_InPlace): class _ReplaceSymbols(Transformer_InPlace):
" Helper for ApplyTemplates "
"""Helper for ApplyTemplates"""


def __init__(self): def __init__(self):
self.names = {} self.names = {}
@@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace):
return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
return self.__default__('template_usage', c, None) return self.__default__('template_usage', c, None)



class ApplyTemplates(Transformer_InPlace): class ApplyTemplates(Transformer_InPlace):
" Apply the templates, creating new rules that represent the used templates "
"""Apply the templates, creating new rules that represent the used templates"""


def __init__(self, rule_defs): def __init__(self, rule_defs):
self.rule_defs = rule_defs self.rule_defs = rule_defs
@@ -401,8 +405,6 @@ def _rfind(s, choices):
return max(s.rfind(c) for c in choices) return max(s.rfind(c) for c in choices)






def _literal_to_pattern(literal): def _literal_to_pattern(literal):
v = literal.value v = literal.value
flag_start = _rfind(v, '/"')+1 flag_start = _rfind(v, '/"')+1
@@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace):
assert start.type == end.type == 'STRING' assert start.type == end.type == 'STRING'
start = start.value[1:-1] start = start.value[1:-1]
end = end.value[1:-1] end = end.value[1:-1]
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end)))
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
regexp = '[%s-%s]' % (start, end) regexp = '[%s-%s]' % (start, end)
return ST('pattern', [PatternRE(regexp)]) return ST('pattern', [PatternRE(regexp)])


@@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set):


return PatternRE(regexp, flags) return PatternRE(regexp, flags)



class TerminalTreeToPattern(Transformer): class TerminalTreeToPattern(Transformer):
def pattern(self, ps): def pattern(self, ps):
p ,= ps p ,= ps
@@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer):
def value(self, v): def value(self, v):
return v[0] return v[0]



class PrepareSymbols(Transformer_InPlace): class PrepareSymbols(Transformer_InPlace):
def value(self, v): def value(self, v):
v ,= v v ,= v
@@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace):
return Terminal(Str(v.value), filter_out=v.startswith('_')) return Terminal(Str(v.value), filter_out=v.startswith('_'))
assert False assert False



def _choice_of_rules(rules): def _choice_of_rules(rules):
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])



def nr_deepcopy_tree(t): def nr_deepcopy_tree(t):
"Deepcopy tree `t` without recursion"
"""Deepcopy tree `t` without recursion"""
return Transformer_NonRecursive(False).transform(t) return Transformer_NonRecursive(False).transform(t)



class Grammar: class Grammar:
def __init__(self, rule_defs, term_defs, ignore): def __init__(self, rule_defs, term_defs, ignore):
self.term_defs = term_defs self.term_defs = term_defs
@@ -547,7 +554,7 @@ class Grammar:
raise GrammarError("Terminals cannot be empty (%s)" % name) raise GrammarError("Terminals cannot be empty (%s)" % name)


transformer = PrepareLiterals() * TerminalTreeToPattern() transformer = PrepareLiterals() * TerminalTreeToPattern()
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority)
terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
for name, (term_tree, priority) in term_defs if term_tree] for name, (term_tree, priority) in term_defs if term_tree]


# ================= # =================
@@ -566,10 +573,10 @@ class Grammar:
ebnf_to_bnf = EBNF_to_BNF() ebnf_to_bnf = EBNF_to_BNF()
rules = [] rules = []
i = 0 i = 0
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
name, params, rule_tree, options = rule_defs[i] name, params, rule_tree, options = rule_defs[i]
i += 1 i += 1
if len(params) != 0: # Dont transform templates
if len(params) != 0: # Dont transform templates
continue continue
rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
ebnf_to_bnf.rule_options = rule_options ebnf_to_bnf.rule_options = rule_options
@@ -594,7 +601,7 @@ class Grammar:


for i, (expansion, alias) in enumerate(expansions): for i, (expansion, alias) in enumerate(expansions):
if alias and name.startswith('_'): if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))


empty_indices = [x==_EMPTY for x in expansion] empty_indices = [x==_EMPTY for x in expansion]
if any(empty_indices): if any(empty_indices):
@@ -623,14 +630,13 @@ class Grammar:
# Remove duplicates # Remove duplicates
compiled_rules = list(set(compiled_rules)) compiled_rules = list(set(compiled_rules))



# Filter out unused rules # Filter out unused rules
while True: while True:
c = len(compiled_rules) c = len(compiled_rules)
used_rules = {s for r in compiled_rules used_rules = {s for r in compiled_rules
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
used_rules |= {NonTerminal(s) for s in start} used_rules |= {NonTerminal(s) for s in start}
compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
for r in unused: for r in unused:
@@ -663,6 +669,7 @@ class PackageResource(object):
def __repr__(self): def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path)



class FromPackageLoader(object): class FromPackageLoader(object):
""" """
Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
@@ -699,11 +706,12 @@ class FromPackageLoader(object):
return PackageResource(self.pkg_name, full_path), text.decode() return PackageResource(self.pkg_name, full_path), text.decode()
raise IOError() raise IOError()


stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)


stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)


_imported_grammars = {} _imported_grammars = {}



def import_from_grammar_into_namespace(grammar, namespace, aliases): def import_from_grammar_into_namespace(grammar, namespace, aliases):
"""Returns all rules and terminals of grammar, prepended """Returns all rules and terminals of grammar, prepended
with a 'namespace' prefix, except for those which are aliased. with a 'namespace' prefix, except for those which are aliased.
@@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
return _find_used_symbols(tree) - set(params) return _find_used_symbols(tree) - set(params)




def get_namespace_name(name, params): def get_namespace_name(name, params):
if params is not None: if params is not None:
try: try:
@@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
else: else:
assert symbol.type == 'RULE' assert symbol.type == 'RULE'
_, params, tree, options = imported_rules[symbol] _, params, tree, options = imported_rules[symbol]
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params}
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params}
for t in tree.iter_subtrees(): for t in tree.iter_subtrees():
for i, c in enumerate(t.children): for i, c in enumerate(t.children):
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
t.children[i] = Token(c.type, get_namespace_name(c, params_map)) t.children[i] = Token(c.type, get_namespace_name(c, params_map))
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries
rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options))



return term_defs, rule_defs return term_defs, rule_defs





def resolve_term_references(term_defs): def resolve_term_references(term_defs):
# TODO Solve with transitive closure (maybe) # TODO Solve with transitive closure (maybe)


@@ -798,7 +802,7 @@ def options_from_rule(name, params, *x):
else: else:
expansions ,= x expansions ,= x
priority = None priority = None
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser


keep_all_tokens = name.startswith('!') keep_all_tokens = name.startswith('!')
name = name.lstrip('!') name = name.lstrip('!')
@@ -812,10 +816,12 @@ def options_from_rule(name, params, *x):
def symbols_from_strcase(expansion): def symbols_from_strcase(expansion):
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]



@inline_args @inline_args
class PrepareGrammar(Transformer_InPlace): class PrepareGrammar(Transformer_InPlace):
def terminal(self, name): def terminal(self, name):
return name return name

def nonterminal(self, name): def nonterminal(self, name):
return name return name


@@ -825,10 +831,11 @@ def _find_used_symbols(tree):
return {t for x in tree.find_data('expansion') return {t for x in tree.find_data('expansion')
for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}



class GrammarLoader: class GrammarLoader:
ERRORS = [ ERRORS = [
('Unclosed parenthesis', ['a: (\n']), ('Unclosed parenthesis', ['a: (\n']),
('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
('Illegal name for rules or terminals', ['Aa:\n']), ('Illegal name for rules or terminals', ['Aa:\n']),
('Alias expects lowercase name', ['a: -> "a"\n']), ('Alias expects lowercase name', ['a: -> "a"\n']),
@@ -843,8 +850,9 @@ class GrammarLoader:
def __init__(self, global_keep_all_tokens): def __init__(self, global_keep_all_tokens):
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]


rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)]
rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
for r, _p, xs, o in rules for i, x in enumerate(xs)]
callback = ParseTreeBuilder(rules, ST).create_callback() callback = ParseTreeBuilder(rules, ST).create_callback()
import re import re
lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
@@ -881,10 +889,10 @@ class GrammarLoader:
return _imported_grammars[grammar_path] return _imported_grammars[grammar_path]


def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
"Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
"""Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""


try: try:
tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') )
tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n'))
except UnexpectedCharacters as e: except UnexpectedCharacters as e:
context = e.get_context(grammar_text) context = e.get_context(grammar_text)
raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
@@ -1037,7 +1045,7 @@ class GrammarLoader:
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name))
if len(args) != rule_names[sym]: if len(args) != rule_names[sym]:
raise GrammarError("Wrong number of template arguments used for %s " raise GrammarError("Wrong number of template arguments used for %s "
"(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name))
"(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name))
for sym in _find_used_symbols(expansions): for sym in _find_used_symbols(expansions):
if sym.type == 'TERMINAL': if sym.type == 'TERMINAL':
if sym not in terminal_names: if sym not in terminal_names:
@@ -1046,10 +1054,8 @@ class GrammarLoader:
if sym not in rule_names and sym not in params: if sym not in rule_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))



return Grammar(rules, term_defs, ignore_names) return Grammar(rules, term_defs, ignore_names)





def load_grammar(grammar, source, import_paths, global_keep_all_tokens): def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)

+ 18
- 7
lark/parse_tree_builder.py View File

@@ -1,7 +1,7 @@
from .exceptions import GrammarError from .exceptions import GrammarError
from .lexer import Token from .lexer import Token
from .tree import Tree from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated
from .visitors import InlineTransformer # XXX Deprecated
from .visitors import Transformer_InPlace from .visitors import Transformer_InPlace
from .visitors import _vargs_meta, _vargs_meta_inline from .visitors import _vargs_meta, _vargs_meta_inline


@@ -20,6 +20,7 @@ class ExpandSingleChild:
else: else:
return self.node_builder(children) return self.node_builder(children)



class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder): def __init__(self, node_builder):
self.node_builder = node_builder self.node_builder = node_builder
@@ -87,8 +88,9 @@ class ChildFilter:


return self.node_builder(filtered) return self.node_builder(filtered)



class ChildFilterLALR(ChildFilter): class ChildFilterLALR(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""


def __call__(self, children): def __call__(self, children):
filtered = [] filtered = []
@@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter):


return self.node_builder(filtered) return self.node_builder(filtered)



class ChildFilterLALR_NoPlaceholders(ChildFilter): class ChildFilterLALR_NoPlaceholders(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __init__(self, to_include, node_builder): def __init__(self, to_include, node_builder):
@@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter):
filtered.append(children[i]) filtered.append(children[i])
return self.node_builder(filtered) return self.node_builder(filtered)



def _should_expand(sym): def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_') return not sym.is_term and sym.name.startswith('_')



def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
# Prepare empty_indices as: How many Nones to insert at each index? # Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices: if _empty_indices:
@@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi
# LALR without placeholders # LALR without placeholders
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])



class AmbiguousExpander: class AmbiguousExpander:
"""Deal with the case where we're expanding children ('_rule') into a parent but the children """Deal with the case where we're expanding children ('_rule') into a parent but the children
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
@@ -167,10 +173,10 @@ class AmbiguousExpander:
self.to_expand = to_expand self.to_expand = to_expand


def __call__(self, children): def __call__(self, children):
def _is_ambig_tree(child):
return hasattr(child, 'data') and child.data == '_ambig'
def _is_ambig_tree(t):
return hasattr(t, 'data') and t.data == '_ambig'


#### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
# All children of an _ambig node should be a derivation of that ambig node, hence # All children of an _ambig node should be a derivation of that ambig node, hence
# it is safe to assume that if we see an _ambig node nested within an ambig node # it is safe to assume that if we see an _ambig node nested within an ambig node
# it is safe to simply expand it into the parent _ambig node as an alternative derivation. # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
@@ -186,15 +192,17 @@ class AmbiguousExpander:
if not ambiguous: if not ambiguous:
return self.node_builder(children) return self.node_builder(children)


expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])



def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
to_expand = [i for i, sym in enumerate(expansion) to_expand = [i for i, sym in enumerate(expansion)
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
if to_expand: if to_expand:
return partial(AmbiguousExpander, to_expand, tree_class) return partial(AmbiguousExpander, to_expand, tree_class)



class AmbiguousIntermediateExpander: class AmbiguousIntermediateExpander:
""" """
Propagate ambiguous intermediate nodes and their derivations up to the Propagate ambiguous intermediate nodes and their derivations up to the
@@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander:


return self.node_builder(children) return self.node_builder(children)



def ptb_inline_args(func): def ptb_inline_args(func):
@wraps(func) @wraps(func)
def f(children): def f(children):
return func(*children) return func(*children)
return f return f



def inplace_transformer(func): def inplace_transformer(func):
@wraps(func) @wraps(func)
def f(children): def f(children):
@@ -289,9 +299,11 @@ def inplace_transformer(func):
return func(tree) return func(tree)
return f return f



def apply_visit_wrapper(func, name, wrapper): def apply_visit_wrapper(func, name, wrapper):
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
raise NotImplementedError("Meta args not supported for internal transformer") raise NotImplementedError("Meta args not supported for internal transformer")

@wraps(func) @wraps(func)
def f(children): def f(children):
return wrapper(func, name, children, None) return wrapper(func, name, children, None)
@@ -323,7 +335,6 @@ class ParseTreeBuilder:


yield rule, wrapper_chain yield rule, wrapper_chain



def create_callback(self, transformer=None): def create_callback(self, transformer=None):
callbacks = {} callbacks = {}




+ 2
- 2
lark/parsers/earley.py View File

@@ -298,8 +298,8 @@ class Parser:
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions: if not solutions:
expected_tokens = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_tokens)
expected_terminals = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_terminals)


if self.debug: if self.debug:
from .earley_forest import ForestToPyDotVisitor from .earley_forest import ForestToPyDotVisitor


+ 10
- 6
lark/tree.py View File

@@ -46,14 +46,14 @@ class Tree(object):


def _pretty(self, level, indent_str): def _pretty(self, level, indent_str):
if len(self.children) == 1 and not isinstance(self.children[0], Tree): if len(self.children) == 1 and not isinstance(self.children[0], Tree):
return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']


l = [ indent_str*level, self._pretty_label(), '\n' ]
l = [indent_str*level, self._pretty_label(), '\n']
for n in self.children: for n in self.children:
if isinstance(n, Tree): if isinstance(n, Tree):
l += n._pretty(level+1, indent_str) l += n._pretty(level+1, indent_str)
else: else:
l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
l += [indent_str*(level+1), '%s' % (n,), '\n']


return l return l


@@ -102,8 +102,8 @@ class Tree(object):
###} ###}


def expand_kids_by_index(self, *indices): def expand_kids_by_index(self, *indices):
"Expand (inline) children at the given indices"
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i] kid = self.children[i]
self.children[i:i+1] = kid.children self.children[i:i+1] = kid.children


@@ -144,12 +144,15 @@ class Tree(object):
@property @property
def line(self): def line(self):
return self.meta.line return self.meta.line

@property @property
def column(self): def column(self):
return self.meta.column return self.meta.column

@property @property
def end_line(self): def end_line(self):
return self.meta.end_line return self.meta.end_line

@property @property
def end_column(self): def end_column(self):
return self.meta.end_column return self.meta.end_column
@@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs):
graph = pydot__tree_to_graph(tree, rankdir, **kwargs) graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
graph.write(filename) graph.write(filename)



def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
"""Creates a colorful image that represents the tree (data+children, without meta) """Creates a colorful image that represents the tree (data+children, without meta)


@@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):


subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
for child in subtree.children] for child in subtree.children]
node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data)
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
i[0] += 1 i[0] += 1
graph.add_node(node) graph.add_node(node)




+ 9
- 13
lark/utils.py View File

@@ -1,10 +1,10 @@
import sys
import os import os
from functools import reduce from functools import reduce
from ast import literal_eval from ast import literal_eval
from collections import deque from collections import deque


###{standalone ###{standalone
import sys, re
import logging import logging
logger = logging.getLogger("lark") logger = logging.getLogger("lark")
logger.addHandler(logging.StreamHandler()) logger.addHandler(logging.StreamHandler())
@@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages # By default, we should not output any log messages
logger.setLevel(logging.CRITICAL) logger.setLevel(logging.CRITICAL)


Py36 = (sys.version_info[:2] >= (3, 6))



def classify(seq, key=None, value=None): def classify(seq, key=None, value=None):
d = {} d = {}
@@ -27,7 +29,7 @@ def classify(seq, key=None, value=None):


def _deserialize(data, namespace, memo): def _deserialize(data, namespace, memo):
if isinstance(data, dict): if isinstance(data, dict):
if '__type__' in data: # Object
if '__type__' in data: # Object
class_ = namespace[data['__type__']] class_ = namespace[data['__type__']]
return class_.deserialize(data, memo) return class_.deserialize(data, memo)
elif '@' in data: elif '@' in data:
@@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize):
return _deserialize(data, namespace, memo) return _deserialize(data, namespace, memo)





try: try:
STRING_TYPE = basestring STRING_TYPE = basestring
except NameError: # Python 3 except NameError: # Python 3
@@ -118,10 +119,11 @@ from contextlib import contextmanager


Str = type(u'') Str = type(u'')
try: try:
classtype = types.ClassType # Python2
classtype = types.ClassType # Python2
except AttributeError: except AttributeError:
classtype = type # Python3 classtype = type # Python3



def smart_decorator(f, create_decorator): def smart_decorator(f, create_decorator):
if isinstance(f, types.FunctionType): if isinstance(f, types.FunctionType):
return wraps(f)(create_decorator(f, True)) return wraps(f)(create_decorator(f, True))
@@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator):
else: else:
return create_decorator(f.__func__.__call__, True) return create_decorator(f.__func__.__call__, True)



try: try:
import regex import regex
except ImportError: except ImportError:
regex = None regex = None


import sys, re
Py36 = (sys.version_info[:2] >= (3, 6))

import sre_parse import sre_parse
import sre_constants import sre_constants
categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')

def get_regexp_width(expr): def get_regexp_width(expr):
if regex: if regex:
# Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
@@ -173,9 +174,7 @@ def dedup_list(l):
preserving the original order of the list. Assumes that preserving the original order of the list. Assumes that
the list entries are hashable.""" the list entries are hashable."""
dedup = set() dedup = set()
return [ x for x in l if not (x in dedup or dedup.add(x))]


return [x for x in l if not (x in dedup or dedup.add(x))]




try: try:
@@ -197,8 +196,6 @@ except ImportError:
pass pass






try: try:
compare = cmp compare = cmp
except NameError: except NameError:
@@ -210,7 +207,6 @@ except NameError:
return -1 return -1





class Enumerator(Serialize): class Enumerator(Serialize):
def __init__(self): def __init__(self):
self.enums = {} self.enums = {}


+ 10
- 13
lark/visitors.py View File

@@ -8,6 +8,7 @@ from .lexer import Token
###{standalone ###{standalone
from inspect import getmembers, getmro from inspect import getmembers, getmro



class Discard(Exception): class Discard(Exception):
"""When raising the Discard exception in a transformer callback, """When raising the Discard exception in a transformer callback,
that node is discarded and won't appear in the parent. that node is discarded and won't appear in the parent.
@@ -16,6 +17,7 @@ class Discard(Exception):


# Transformers # Transformers



class _Decoratable: class _Decoratable:
"Provides support for decorating methods with @v_args" "Provides support for decorating methods with @v_args"


@@ -107,7 +109,6 @@ class Transformer(_Decoratable):
except Exception as e: except Exception as e:
raise VisitError(token.type, token, e) raise VisitError(token.type, token, e)



def _transform_children(self, children): def _transform_children(self, children):
for c in children: for c in children:
try: try:
@@ -148,7 +149,6 @@ class Transformer(_Decoratable):
return token return token





class InlineTransformer(Transformer): # XXX Deprecated class InlineTransformer(Transformer): # XXX Deprecated
def _call_userfunc(self, tree, new_children=None): def _call_userfunc(self, tree, new_children=None):
# Assumes tree is already transformed # Assumes tree is already transformed
@@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer):
q = [tree] q = [tree]
while q: while q:
t = q.pop() t = q.pop()
rev_postfix.append( t )
rev_postfix.append(t)
if isinstance(t, Tree): if isinstance(t, Tree):
q += t.children q += t.children


@@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer):
return t return t





class Transformer_InPlaceRecursive(Transformer): class Transformer_InPlaceRecursive(Transformer):
"Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
def _transform_tree(self, tree): def _transform_tree(self, tree):
@@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase):
return tree return tree





def visit_children_decor(func): def visit_children_decor(func):
"See Interpreter" "See Interpreter"
@wraps(func) @wraps(func)
@@ -338,8 +336,6 @@ class Interpreter(_Decoratable):
return self.visit_children(tree) return self.visit_children(tree)






# Decorators # Decorators


def _apply_decorator(obj, decorator, **kwargs): def _apply_decorator(obj, decorator, **kwargs):
@@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs):
return _apply(decorator, **kwargs) return _apply(decorator, **kwargs)





def _inline_args__func(func): def _inline_args__func(func):
@wraps(func) @wraps(func)
def create_decorator(_f, with_self): def create_decorator(_f, with_self):
@@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated
return _apply_decorator(obj, _inline_args__func) return _apply_decorator(obj, _inline_args__func)





def _visitor_args_func_dec(func, visit_wrapper=None, static=False): def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
def create_decorator(_f, with_self): def create_decorator(_f, with_self):
if with_self: if with_self:
@@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
return f return f




def _vargs_inline(f, data, children, meta):
def _vargs_inline(f, _data, children, _meta):
return f(*children) return f(*children)
def _vargs_meta_inline(f, data, children, meta):
def _vargs_meta_inline(f, _data, children, meta):
return f(meta, *children) return f(meta, *children)
def _vargs_meta(f, data, children, meta):
def _vargs_meta(f, _data, children, meta):
return f(children, meta) # TODO swap these for consistency? Backwards incompatible! return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
def _vargs_tree(f, data, children, meta): def _vargs_tree(f, data, children, meta):
return f(Tree(data, children, meta)) return f(Tree(data, children, meta))
@@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first)
tree (bool, optional): Provides the entire tree as the argument, instead of the children. tree (bool, optional): Provides the entire tree as the argument, instead of the children.
wrapper (function, optional): Provide a function to decorate all methods.


Example: Example:
:: ::
@@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
###} ###}




#--- Visitor Utilities ---
# --- Visitor Utilities ---


class CollapseAmbiguities(Transformer): class CollapseAmbiguities(Transformer):
""" """
@@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer):
""" """
def _ambig(self, options): def _ambig(self, options):
return sum(options, []) return sum(options, [])

def __default__(self, data, children_lists, meta): def __default__(self, data, children_lists, meta):
return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]

def __default_token__(self, t): def __default_token__(self, t):
return [t] return [t]

Loading…
Cancel
Save