Cleanup, and a few PEP8 changes

5 years ago · 5b30ba4841
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from types import ModuleType
 from typing import (
    TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional,
    TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
    Pattern as REPattern,
 )
 from abc import abstractmethod, ABC
@@ -100,10 +100,22 @@ class Lexer(ABC):
    lex: Callable[..., Iterator[Token]]


 class LexerConf:
     tokens: Collection[TerminalDef]
     re_module: ModuleType
     ignore: Collection[str] = ()
     postlex: Any =None
     callbacks: Optional[Dict[str, _Callback]] = None
     g_regex_flags: int = 0
     skip_validation: bool = False
     use_bytes: bool = False



 class TraditionalLexer(Lexer):
    terminals: Collection[TerminalDef]
    ignore_types: List[str]
    newline_types: List[str]
    ignore_types: FrozenSet[str]
    newline_types: FrozenSet[str]
    user_callbacks: Dict[str, _Callback]
    callback: Dict[str, _Callback]
    mres: List[Tuple[REPattern, Dict[int, str]]]
@@ -111,11 +123,7 @@ class TraditionalLexer(Lexer):

    def __init__(
        self,
        terminals: Collection[TerminalDef],
        re_: ModuleType,
        ignore: Collection[str] = ...,
        user_callbacks: Dict[str, _Callback] = ...,
        g_regex_flags: int = ...
        conf: LexerConf
    ):
        ...

@@ -128,6 +136,8 @@ class TraditionalLexer(Lexer):
    def lex(self, stream: str) -> Iterator[Token]:
        ...

    def next_token(self, lex_state: Any) -> Token:
        ...

 class ContextualLexer(Lexer):
    lexers: Dict[str, TraditionalLexer]
--- a/lark/common.py
+++ b/lark/common.py
@@ -3,6 +3,7 @@ from .lexer import TerminalDef

 ###{standalone


 class LexerConf(Serialize):
    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
    __serialize_namespace__ = TerminalDef,
@@ -19,11 +20,10 @@ class LexerConf(Serialize):

 ###}


 class ParserConf:
    def __init__(self, rules, callbacks, start):
        assert isinstance(start, list)
        self.rules = rules
        self.callbacks = callbacks
        self.start = start


--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger
 class LarkError(Exception):
    pass


 class GrammarError(LarkError):
    pass


 class ParseError(LarkError):
    pass


 class LexError(LarkError):
    pass


 class UnexpectedEOF(ParseError):
    def __init__(self, expected):
        self.expected = expected
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -40,14 +40,12 @@ class Terminal(Symbol):
        return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)



 class NonTerminal(Symbol):
    __serialize_fields__ = 'name',

    is_term = False



 class RuleOptions(Serialize):
    __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'

@@ -104,5 +102,4 @@ class Rule(Serialize):
        return self.origin == other.origin and self.expansion == other.expansion



 ###}
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -24,6 +24,7 @@ except ImportError:

 ###{standalone


 class LarkOptions(Serialize):
    """Specifies the options for Lark

@@ -36,7 +37,7 @@ class LarkOptions(Serialize):
    debug
            Display debug information, such as warnings (default: False)
    transformer
            Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster)
            Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
    propagate_positions
            Propagates (line, column, end_line, end_column) attributes into all tree branches.
    maybe_placeholders
@@ -320,7 +321,7 @@ class Lark(Serialize):
        # Else, if the user asked to disable priorities, strip them from the
        # rules. This allows the Earley parsers to skip an extra forest walk
        # for improved performance, if you don't need them (or didn't specify any).
        elif self.options.priority == None:
        elif self.options.priority is None:
            for rule in self.rules:
                if rule.options.priority is not None:
                    rule.options.priority = None
@@ -360,7 +361,7 @@ class Lark(Serialize):
                    self.rules,
                    self.options.tree_class or Tree,
                    self.options.propagate_positions,
                    self.options.parser!='lalr' and self.options.ambiguity=='explicit',
                    self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
                    self.options.maybe_placeholders
                )
            self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
@@ -410,7 +411,7 @@ class Lark(Serialize):
            data['parser'],
            memo,
            self._callbacks,
            self.options, # Not all, but multiple attributes are used
            self.options,  # Not all, but multiple attributes are used
        )
        self.terminals = self.parser.lexer_conf.tokens
        self._terminals_dict = {t.name: t for t in self.terminals}
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -1,4 +1,4 @@
 ## Lexer Implementation
 # Lexer Implementation

 import re

@@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
 ###{standalone
 from copy import copy


 class Pattern(Serialize):

    def __init__(self, value, flags=()):
@@ -20,6 +21,7 @@ class Pattern(Serialize):
    # Pattern Hashing assumes all subclasses have a different priority!
    def __hash__(self):
        return hash((type(self), self.value, self.flags))

    def __eq__(self, other):
        return type(self) == type(other) and self.value == other.value and self.flags == other.flags

@@ -53,6 +55,7 @@ class PatternStr(Pattern):
        return len(self.value)
    max_width = min_width


 class PatternRE(Pattern):
    __serialize_fields__ = 'value', 'flags', '_width'

@@ -70,6 +73,7 @@ class PatternRE(Pattern):
    @property
    def min_width(self):
        return self._get_width()[0]

    @property
    def max_width(self):
        return self._get_width()[1]
@@ -139,7 +143,7 @@ class Token(Str):
        return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

    def __reduce__(self):
        return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
        return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))

    def __repr__(self):
        return 'Token(%r, %r)' % (self.type, self.value)
@@ -193,6 +197,7 @@ class UnlessCallback:
                break
        return t


 class CallChain:
    def __init__(self, callback1, callback2, cond):
        self.callback1 = callback1
@@ -204,16 +209,13 @@ class CallChain:
        return self.callback2(t) if self.cond(t2) else t2





 def _create_unless(terminals, g_regex_flags, re_, use_bytes):
    tokens_by_type = classify(terminals, lambda t: type(t.pattern))
    assert len(tokens_by_type) <= 2, tokens_by_type.keys()
    embedded_strs = set()
    callback = {}
    for retok in tokens_by_type.get(PatternRE, []):
        unless = [] # {}
        unless = []
        for strtok in tokens_by_type.get(PatternStr, []):
            if strtok.priority > retok.priority:
                continue
@@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes)
        except AssertionError:  # Yes, this is what Python provides us.. :/
            return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes)

        mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
        mres.append((mre, {i: n for n, i in mre.groupindex.items()}))
        terminals = terminals[max_size:]
    return mres


 def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False):
    return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes)


 def _regexp_has_newline(r):
    r"""Expressions that may indicate newlines in a regexp:
        - newlines (\n)
@@ -262,6 +266,7 @@ def _regexp_has_newline(r):
    """
    return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)


 class Lexer(object):
    """Lexer interface

@@ -300,7 +305,7 @@ class TraditionalLexer(Lexer):
        self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
        self.ignore_types = frozenset(conf.ignore)

        terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
        terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
        self.terminals = terminals
        self.user_callbacks = conf.callbacks
        self.g_regex_flags = conf.g_regex_flags
@@ -309,7 +314,7 @@ class TraditionalLexer(Lexer):
        self._mres = None

    def _build(self):
        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes)
        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
        assert all(self.callback.values())

        for type_, f in self.user_callbacks.items():
@@ -333,7 +338,7 @@ class TraditionalLexer(Lexer):
            if m:
                return m.group(0), type_from_index[m.lastindex]

    def lex(self, state, parser_state):
    def lex(self, state, _parser_state):
        with suppress(EOFError):
            while True:
                yield self.next_token(state)
@@ -372,6 +377,7 @@ class TraditionalLexer(Lexer):
        # EOF
        raise EOFError(self)


 class LexerState:
    __slots__ = 'text', 'line_ctr', 'last_token'

@@ -383,6 +389,7 @@ class LexerState:
    def __copy__(self):
        return type(self)(self.text, copy(self.line_ctr), self.last_token)


 class ContextualLexer(Lexer):

    def __init__(self, conf, states, always_accept=()):
@@ -430,8 +437,9 @@ class ContextualLexer(Lexer):
            token = self.root_lexer.next_token(lexer_state)
            raise UnexpectedToken(token, e.allowed, state=parser_state.position)


 class LexerThread:
    "A thread that ties a lexer instance and a lexer state, to be used by the parser"
    """A thread that ties a lexer instance and a lexer state, to be used by the parser"""

    def __init__(self, lexer, text):
        self.lexer = lexer
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -1,4 +1,4 @@
 "Parses and creates Grammar objects"
 """Parses and creates Grammar objects"""

 import os.path
 import sys
@@ -166,6 +166,7 @@ RULES = {
    'literal': ['REGEXP', 'STRING'],
 }


@inline_args
 class EBNF_to_BNF(Transformer_InPlace):
    def __init__(self):
@@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor):
        for i, child in enumerate(tree.children):
            if isinstance(child, Tree) and child.data == 'expansions':
                tree.data = 'expansions'
                tree.children = [self.visit(ST('expansion', [option if i==j else other
                                                            for j, other in enumerate(tree.children)]))
                                    for option in dedup_list(child.children)]
                tree.children = [self.visit(ST('expansion', [option if i == j else other
                                                             for j, other in enumerate(tree.children)]))
                                 for option in dedup_list(child.children)]
                self._flatten(tree)
                break

@@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor):
 class RuleTreeToText(Transformer):
    def expansions(self, x):
        return x

    def expansion(self, symbols):
        return symbols, None

    def alias(self, x):
        (expansion, _alias), alias = x
        assert _alias is None, (alias, expansion, '-', _alias)  # Double alias not allowed
@@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace):
        tokenmods, value = args
        return tokenmods + [value]


 class PrepareAnonTerminals(Transformer_InPlace):
    "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"
    """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""

    def __init__(self, terminals):
        self.terminals = terminals
@@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace):
        self.i = 0
        self.rule_options = None


    @inline_args
    def pattern(self, p):
        value = p.value
@@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace):
                except KeyError:
                    if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set:
                        with suppress(UnicodeEncodeError):
                            value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names
                            value.upper().encode('ascii')  # Make sure we don't have unicode in our terminal names
                            term_name = value.upper()

                if term_name in self.term_set:
                    term_name = None

        elif isinstance(p, PatternRE):
            if p in self.term_reverse: # Kind of a weird placement.name
            if p in self.term_reverse:  # Kind of a weird placement.name
                term_name = self.term_reverse[p].name
        else:
            assert False, p
@@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace):


 class _ReplaceSymbols(Transformer_InPlace):
    " Helper for ApplyTemplates "
    """Helper for ApplyTemplates"""

    def __init__(self):
        self.names = {}
@@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace):
            return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
        return self.__default__('template_usage', c, None)


 class ApplyTemplates(Transformer_InPlace):
    " Apply the templates, creating new rules that represent the used templates "
    """Apply the templates, creating new rules that represent the used templates"""

    def __init__(self, rule_defs):
        self.rule_defs = rule_defs
@@ -401,8 +405,6 @@ def _rfind(s, choices):
    return max(s.rfind(c) for c in choices)




 def _literal_to_pattern(literal):
    v = literal.value
    flag_start = _rfind(v, '/"')+1
@@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace):
        assert start.type == end.type == 'STRING'
        start = start.value[1:-1]
        end = end.value[1:-1]
        assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end)))
        assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
        regexp = '[%s-%s]' % (start, end)
        return ST('pattern', [PatternRE(regexp)])

@@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set):

    return PatternRE(regexp, flags)


 class TerminalTreeToPattern(Transformer):
    def pattern(self, ps):
        p ,= ps
@@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer):
    def value(self, v):
        return v[0]


 class PrepareSymbols(Transformer_InPlace):
    def value(self, v):
        v ,= v
@@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace):
            return Terminal(Str(v.value), filter_out=v.startswith('_'))
        assert False


 def _choice_of_rules(rules):
    return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])


 def nr_deepcopy_tree(t):
    "Deepcopy tree `t` without recursion"
    """Deepcopy tree `t` without recursion"""
    return Transformer_NonRecursive(False).transform(t)


 class Grammar:
    def __init__(self, rule_defs, term_defs, ignore):
        self.term_defs = term_defs
@@ -547,7 +554,7 @@ class Grammar:
                raise GrammarError("Terminals cannot be empty (%s)" % name)

        transformer = PrepareLiterals() * TerminalTreeToPattern()
        terminals = [TerminalDef(name, transformer.transform( term_tree ), priority)
        terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
                     for name, (term_tree, priority) in term_defs if term_tree]

        # =================
@@ -566,10 +573,10 @@ class Grammar:
        ebnf_to_bnf = EBNF_to_BNF()
        rules = []
        i = 0
        while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
        while i < len(rule_defs):  # We have to do it like this because rule_defs might grow due to templates
            name, params, rule_tree, options = rule_defs[i]
            i += 1
            if len(params) != 0: # Dont transform templates
            if len(params) != 0:  # Dont transform templates
                continue
            rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
            ebnf_to_bnf.rule_options = rule_options
@@ -594,7 +601,7 @@ class Grammar:

            for i, (expansion, alias) in enumerate(expansions):
                if alias and name.startswith('_'):
                    raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
                    raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))

                empty_indices = [x==_EMPTY for x in expansion]
                if any(empty_indices):
@@ -623,14 +630,13 @@ class Grammar:
            # Remove duplicates
            compiled_rules = list(set(compiled_rules))


        # Filter out unused rules
        while True:
            c = len(compiled_rules)
            used_rules = {s for r in compiled_rules
                                for s in r.expansion
                                if isinstance(s, NonTerminal)
                                and s != r.origin}
                            for s in r.expansion
                            if isinstance(s, NonTerminal)
                            and s != r.origin}
            used_rules |= {NonTerminal(s) for s in start}
            compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
            for r in unused:
@@ -663,6 +669,7 @@ class PackageResource(object):
    def __repr__(self):
        return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path)


 class FromPackageLoader(object):
    """
    Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
@@ -699,11 +706,12 @@ class FromPackageLoader(object):
                return PackageResource(self.pkg_name, full_path), text.decode()
        raise IOError()

 stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

 stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)

 _imported_grammars = {}


 def import_from_grammar_into_namespace(grammar, namespace, aliases):
    """Returns all rules and terminals of grammar, prepended
    with a 'namespace' prefix, except for those which are aliased.
@@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
            raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
        return _find_used_symbols(tree) - set(params)



    def get_namespace_name(name, params):
        if params is not None:
            try:
@@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
        else:
            assert symbol.type == 'RULE'
            _, params, tree, options = imported_rules[symbol]
            params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params}
            params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params}
            for t in tree.iter_subtrees():
                for i, c in enumerate(t.children):
                    if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
                        t.children[i] = Token(c.type, get_namespace_name(c, params_map))
            params = [params_map[p] for p in params] # We can not rely on ordered dictionaries
            params = [params_map[p] for p in params]  # We can not rely on ordered dictionaries
            rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options))


    return term_defs, rule_defs



 def resolve_term_references(term_defs):
    # TODO Solve with transitive closure (maybe)

@@ -798,7 +802,7 @@ def options_from_rule(name, params, *x):
    else:
        expansions ,= x
        priority = None
    params = [t.value for t in params.children] if params is not None else [] # For the grammar parser
    params = [t.value for t in params.children] if params is not None else []  # For the grammar parser

    keep_all_tokens = name.startswith('!')
    name = name.lstrip('!')
@@ -812,10 +816,12 @@ def options_from_rule(name, params, *x):
 def symbols_from_strcase(expansion):
    return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]


@inline_args
 class PrepareGrammar(Transformer_InPlace):
    def terminal(self, name):
        return name

    def nonterminal(self, name):
        return name

@@ -825,10 +831,11 @@ def _find_used_symbols(tree):
    return {t for x in tree.find_data('expansion')
              for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}


 class GrammarLoader:
    ERRORS = [
        ('Unclosed parenthesis', ['a: (\n']),
        ('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
        ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
        ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
        ('Illegal name for rules or terminals', ['Aa:\n']),
        ('Alias expects lowercase name', ['a: -> "a"\n']),
@@ -843,8 +850,9 @@ class GrammarLoader:
    def __init__(self, global_keep_all_tokens):
        terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

        rules = [options_from_rule(name, None, x) for name, x in  RULES.items()]
        rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)]
        rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
        rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
                 for r, _p, xs, o in rules for i, x in enumerate(xs)]
        callback = ParseTreeBuilder(rules, ST).create_callback()
        import re
        lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT'])
@@ -881,10 +889,10 @@ class GrammarLoader:
        return _imported_grammars[grammar_path]

    def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]):
        "Parse grammar_text, verify, and create Grammar object. Display nice messages on error."
        """Parse grammar_text, verify, and create Grammar object. Display nice messages on error."""

        try:
            tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') )
            tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n'))
        except UnexpectedCharacters as e:
            context = e.get_context(grammar_text)
            raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
@@ -1037,7 +1045,7 @@ class GrammarLoader:
                        raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name))
                    if len(args) != rule_names[sym]:
                        raise GrammarError("Wrong number of template arguments used for %s "
                                           "(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name))
                                           "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name))
            for sym in _find_used_symbols(expansions):
                if sym.type == 'TERMINAL':
                    if sym not in terminal_names:
@@ -1046,10 +1054,8 @@ class GrammarLoader:
                    if sym not in rule_names and sym not in params:
                        raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))


        return Grammar(rules, term_defs, ignore_names)



 def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
    return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths)
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -1,7 +1,7 @@
 from .exceptions import GrammarError
 from .lexer import Token
 from .tree import Tree
 from .visitors import InlineTransformer # XXX Deprecated
 from .visitors import InlineTransformer  # XXX Deprecated
 from .visitors import Transformer_InPlace
 from .visitors import _vargs_meta, _vargs_meta_inline

@@ -20,6 +20,7 @@ class ExpandSingleChild:
        else:
            return self.node_builder(children)


 class PropagatePositions:
    def __init__(self, node_builder):
        self.node_builder = node_builder
@@ -87,8 +88,9 @@ class ChildFilter:

        return self.node_builder(filtered)


 class ChildFilterLALR(ChildFilter):
    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
    """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""

    def __call__(self, children):
        filtered = []
@@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter):

        return self.node_builder(filtered)


 class ChildFilterLALR_NoPlaceholders(ChildFilter):
    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
    def __init__(self, to_include, node_builder):
@@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter):
                filtered.append(children[i])
        return self.node_builder(filtered)


 def _should_expand(sym):
    return not sym.is_term and sym.name.startswith('_')


 def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
    # Prepare empty_indices as: How many Nones to insert at each index?
    if _empty_indices:
@@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi
            # LALR without placeholders
            return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])


 class AmbiguousExpander:
    """Deal with the case where we're expanding children ('_rule') into a parent but the children
       are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
@@ -167,10 +173,10 @@ class AmbiguousExpander:
        self.to_expand = to_expand

    def __call__(self, children):
        def _is_ambig_tree(child):
            return hasattr(child, 'data') and child.data == '_ambig'
        def _is_ambig_tree(t):
            return hasattr(t, 'data') and t.data == '_ambig'

        #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
        # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
        #    All children of an _ambig node should be a derivation of that ambig node, hence
        #    it is safe to assume that if we see an _ambig node nested within an ambig node
        #    it is safe to simply expand it into the parent _ambig node as an alternative derivation.
@@ -186,15 +192,17 @@ class AmbiguousExpander:
        if not ambiguous:
            return self.node_builder(children)

        expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ]
        expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
        return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])


 def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
    to_expand = [i for i, sym in enumerate(expansion)
                 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
    if to_expand:
        return partial(AmbiguousExpander, to_expand, tree_class)


 class AmbiguousIntermediateExpander:
    """
    Propagate ambiguous intermediate nodes and their derivations up to the
@@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander:

        return self.node_builder(children)


 def ptb_inline_args(func):
    @wraps(func)
    def f(children):
        return func(*children)
    return f


 def inplace_transformer(func):
    @wraps(func)
    def f(children):
@@ -289,9 +299,11 @@ def inplace_transformer(func):
        return func(tree)
    return f


 def apply_visit_wrapper(func, name, wrapper):
    if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
        raise NotImplementedError("Meta args not supported for internal transformer")

    @wraps(func)
    def f(children):
        return wrapper(func, name, children, None)
@@ -323,7 +335,6 @@ class ParseTreeBuilder:

            yield rule, wrapper_chain


    def create_callback(self, transformer=None):
        callbacks = {}

--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -298,8 +298,8 @@ class Parser:
        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
        solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
        if not solutions:
            expected_tokens = [t.expect for t in to_scan]
            raise UnexpectedEOF(expected_tokens)
            expected_terminals = [t.expect for t in to_scan]
            raise UnexpectedEOF(expected_terminals)

        if self.debug:
            from .earley_forest import ForestToPyDotVisitor
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -46,14 +46,14 @@ class Tree(object):

    def _pretty(self, level, indent_str):
        if len(self.children) == 1 and not isinstance(self.children[0], Tree):
            return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
            return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']

        l = [ indent_str*level, self._pretty_label(), '\n' ]
        l = [indent_str*level, self._pretty_label(), '\n']
        for n in self.children:
            if isinstance(n, Tree):
                l += n._pretty(level+1, indent_str)
            else:
                l += [ indent_str*(level+1), '%s' % (n,), '\n' ]
                l += [indent_str*(level+1), '%s' % (n,), '\n']

        return l

@@ -102,8 +102,8 @@ class Tree(object):
 ###}

    def expand_kids_by_index(self, *indices):
        "Expand (inline) children at the given indices"
        for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
        """Expand (inline) children at the given indices"""
        for i in sorted(indices, reverse=True):  # reverse so that changing tail won't affect indices
            kid = self.children[i]
            self.children[i:i+1] = kid.children

@@ -144,12 +144,15 @@ class Tree(object):
    @property
    def line(self):
        return self.meta.line

    @property
    def column(self):
        return self.meta.column

    @property
    def end_line(self):
        return self.meta.end_line

    @property
    def end_column(self):
        return self.meta.end_column
@@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs):
    graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
    graph.write(filename)


 def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
    """Creates a colorful image that represents the tree (data+children, without meta)

@@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):

        subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
                    for child in subtree.children]
        node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data)
        node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
        i[0] += 1
        graph.add_node(node)

--- a/lark/utils.py
+++ b/lark/utils.py
@@ -1,10 +1,10 @@
 import sys
 import os
 from functools import reduce
 from ast import literal_eval
 from collections import deque

 ###{standalone
 import sys, re
 import logging
 logger = logging.getLogger("lark")
 logger.addHandler(logging.StreamHandler())
@@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler())
 # By default, we should not output any log messages
 logger.setLevel(logging.CRITICAL)

 Py36 = (sys.version_info[:2] >= (3, 6))


 def classify(seq, key=None, value=None):
    d = {}
@@ -27,7 +29,7 @@ def classify(seq, key=None, value=None):

 def _deserialize(data, namespace, memo):
    if isinstance(data, dict):
        if '__type__' in data: # Object
        if '__type__' in data:  # Object
            class_ = namespace[data['__type__']]
            return class_.deserialize(data, memo)
        elif '@' in data:
@@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize):
        return _deserialize(data, namespace, memo)



 try:
    STRING_TYPE = basestring
 except NameError:   # Python 3
@@ -118,10 +119,11 @@ from contextlib import contextmanager

 Str = type(u'')
 try:
    classtype = types.ClassType # Python2
    classtype = types.ClassType  # Python2
 except AttributeError:
    classtype = type    # Python3


 def smart_decorator(f, create_decorator):
    if isinstance(f, types.FunctionType):
        return wraps(f)(create_decorator(f, True))
@@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator):
    else:
        return create_decorator(f.__func__.__call__, True)


 try:
    import regex
 except ImportError:
    regex = None

 import sys, re
 Py36 = (sys.version_info[:2] >= (3, 6))

 import sre_parse
 import sre_constants
 categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')

 def get_regexp_width(expr):
    if regex:
        # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
@@ -173,9 +174,7 @@ def dedup_list(l):
       preserving the original order of the list. Assumes that
       the list entries are hashable."""
    dedup = set()
    return [ x for x in l if not (x in dedup or dedup.add(x))]


    return [x for x in l if not (x in dedup or dedup.add(x))]


 try:
@@ -197,8 +196,6 @@ except ImportError:
            pass




 try:
    compare = cmp
 except NameError:
@@ -210,7 +207,6 @@ except NameError:
        return -1



 class Enumerator(Serialize):
    def __init__(self):
        self.enums = {}
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -8,6 +8,7 @@ from .lexer import Token
 ###{standalone
 from inspect import getmembers, getmro


 class Discard(Exception):
    """When raising the Discard exception in a transformer callback,
    that node is discarded and won't appear in the parent.
@@ -16,6 +17,7 @@ class Discard(Exception):

 # Transformers


 class _Decoratable:
    "Provides support for decorating methods with @v_args"

@@ -107,7 +109,6 @@ class Transformer(_Decoratable):
            except Exception as e:
                raise VisitError(token.type, token, e)


    def _transform_children(self, children):
        for c in children:
            try:
@@ -148,7 +149,6 @@ class Transformer(_Decoratable):
        return token



 class InlineTransformer(Transformer):   # XXX Deprecated
    def _call_userfunc(self, tree, new_children=None):
        # Assumes tree is already transformed
@@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer):
        q = [tree]
        while q:
            t = q.pop()
            rev_postfix.append( t )
            rev_postfix.append(t)
            if isinstance(t, Tree):
                q += t.children

@@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer):
        return t



 class Transformer_InPlaceRecursive(Transformer):
    "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
    def _transform_tree(self, tree):
@@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase):
        return tree



 def visit_children_decor(func):
    "See Interpreter"
    @wraps(func)
@@ -338,8 +336,6 @@ class Interpreter(_Decoratable):
        return self.visit_children(tree)




 # Decorators

 def _apply_decorator(obj, decorator, **kwargs):
@@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs):
        return _apply(decorator, **kwargs)



 def _inline_args__func(func):
    @wraps(func)
    def create_decorator(_f, with_self):
@@ -370,7 +365,6 @@ def inline_args(obj):   # XXX Deprecated
    return _apply_decorator(obj, _inline_args__func)



 def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
    def create_decorator(_f, with_self):
        if with_self:
@@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
    return f


 def _vargs_inline(f, data, children, meta):
 def _vargs_inline(f, _data, children, _meta):
    return f(*children)
 def _vargs_meta_inline(f, data, children, meta):
 def _vargs_meta_inline(f, _data, children, meta):
    return f(meta, *children)
 def _vargs_meta(f, data, children, meta):
 def _vargs_meta(f, _data, children, meta):
    return f(children, meta)   # TODO swap these for consistency? Backwards incompatible!
 def _vargs_tree(f, data, children, meta):
    return f(Tree(data, children, meta))
@@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
        inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
        meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first)
        tree (bool, optional): Provides the entire tree as the argument, instead of the children.
        wrapper (function, optional): Provide a function to decorate all methods.

    Example:
        ::
@@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None):
 ###}


 #--- Visitor Utilities ---
 # --- Visitor Utilities ---

 class CollapseAmbiguities(Transformer):
    """
@@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer):
    """
    def _ambig(self, options):
        return sum(options, [])

    def __default__(self, data, children_lists, meta):
        return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]

    def __default_token__(self, t):
        return [t]