Merge branch 'master' into master

7 years ago · c1166695b7
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ Lark can:
 - Build a parse-tree automagically, no construction code required
 - Outperform all other Python libraries when using LALR(1) (Yes, including PLY)
 - Run on every Python interpreter (it's pure-python)
 - Generate a stand-alone parser (for LALR(1) grammars)
 And many more features. Read ahead and find out.
@@ -66,10 +67,11 @@ See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples)
 - Builds a parse-tree (AST) automagically, based on the structure of the grammar
 - **Earley** parser
    - Can parse *ALL* context-free grammars
    - Full support for ambiguity in grammar
    - Can parse all context-free grammars
    - Full support for ambiguous grammars
 - **LALR(1)** parser
    - Competitive with PLY
    - Fast and light, competitive with PLY
    - Can generate a stand-alone parser
 - **EBNF** grammar
 - **Unicode** fully supported
 - **Python 2 & 3** compatible
@@ -86,7 +88,7 @@ See the full list of [features in the wiki](https://github.com/erezsh/lark/wiki/
 #### Performance comparison
 Lower is better!
 Lark is the fastest and lightest (lower is better)
 ![Run-time Comparison](docs/comparison_runtime.png)
@@ -99,17 +101,17 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
 #### Feature comparison
 | Library | Algorithm | Grammar | Builds tree? | Supports ambiguity? | Can handle every CFG?
 |:--------|:----------|:----|:--------|:------------|:------------
 | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! |
 | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No |
 | [PyParsing](http://pyparsing.wikispaces.com/) | PEG | Combinators | No | No | No\* |
 | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* |
 | [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | Combinators | No | No | No |
 | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* |
 | Library | Algorithm | Grammar | Builds tree? | Supports ambiguity? | Can handle every CFG? | Line/Column tracking | Generates Stand-alone
 |:--------|:----------|:----|:--------|:------------|:------------|:----------|:----------
 | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) |
 | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No |
 | [PyParsing](http://pyparsing.wikispaces.com/) | PEG | Combinators | No | No | No\* | No | No |
 | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No |
 | [funcparserlib](https://github.com/vlasovskikh/funcparserlib) | Recursive-Descent | Combinators | No | No | No | No | No |
 | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No |
 (\* *According to Wikipedia, it remains unanswered whether PEGs can really parse all deterministic CFGs*)
 (\* *PEGs cannot handle non-deterministic grammars. Also, according to Wikipedia, it remains unanswered whether PEGs can really parse all deterministic CFGs*)
 ### Projects using Lark
--- a/examples/standalone/create_standalone.sh
+++ b/examples/standalone/create_standalone.sh
@@ -0,0 +1 @@
 python -m lark.tools.standalone json.g > json_parser.py
--- a/examples/standalone/json.g
+++ b/examples/standalone/json.g
@@ -0,0 +1,21 @@
 ?start: value
 ?value: object
        | array
        | string
        | SIGNED_NUMBER      -> number
        | "true"             -> true
        | "false"            -> false
        | "null"             -> null
 array  : "[" [value ("," value)*] "]"
 object : "{" [pair ("," pair)*] "}"
 pair   : string ":" value
 string : ESCAPED_STRING
 %import common.ESCAPED_STRING
 %import common.SIGNED_NUMBER
 %import common.WS
 %ignore WS
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
@@ -0,0 +1,794 @@
 # The file was automatically generated by Lark v0.5.2
 #
 #
 #   Lark Stand-alone Generator Tool
 # ----------------------------------
 # Generates a stand-alone LALR(1) parser with a standard lexer
 #
 # Git:    https://github.com/erezsh/lark
 # Author: Erez Shinan (erezshin@gmail.com)
 #
 #
 #    >>> LICENSE
 #
 #    This tool and its generated code use a separate license from Lark.
 #
 #    It is licensed under GPLv2 or above.
 #
 #    If you wish to purchase a commercial license for this tool and its
 #    generated code, contact me via email.
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 2 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    See <http://www.gnu.org/licenses/>.
 #
 #
 import types
 import functools
 from contextlib import contextmanager
 Str = type(u'')
 def inline_args(f):
    # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType)
    if isinstance(f, types.FunctionType):
        @functools.wraps(f)
        def _f_func(self, args):
            return f(self, *args)
        return _f_func
    elif isinstance(f, (type, types.BuiltinFunctionType)):
        @functools.wraps(f)
        def _f_builtin(_self, args):
            return f(*args)
        return _f_builtin
    elif isinstance(f, types.MethodType):
        @functools.wraps(f.__func__)
        def _f(self, args):
            return f.__func__(self, *args)
        return _f
    else:
        @functools.wraps(f.__call__.__func__)
        def _f(self, args):
            return f.__call__.__func__(self, *args)
        return _f
 try:
    from contextlib import suppress     # Python 3
 except ImportError:
    @contextmanager
    def suppress(*excs):
        '''Catch and dismiss the provided exception
        >>> x = 'hello'
        >>> with suppress(IndexError):
        ...     x = x[10]
        >>> x
        'hello'
        '''
        try:
            yield
        except excs:
            pass
 def is_terminal(sym):
    return sym.isupper()
 class GrammarError(Exception):
    pass
 class ParseError(Exception):
    pass
 class UnexpectedToken(ParseError):
    def __init__(self, token, expected, seq, index):
        self.token = token
        self.expected = expected
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')
        try:
            context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
        except AttributeError:
            context = seq[index:index+5]
        except TypeError:
            context = "<no context>"
        message = ("Unexpected token %r at line %s, column %s.\n"
                   "Expected: %s\n"
                   "Context: %s" % (token, self.line, self.column, expected, context))
        super(UnexpectedToken, self).__init__(message)
 class Tree(object):
    def __init__(self, data, children):
        self.data = data
        self.children = list(children)
    def __repr__(self):
        return 'Tree(%s, %s)' % (self.data, self.children)
    def _pretty_label(self):
        return self.data
    def _pretty(self, level, indent_str):
        if len(self.children) == 1 and not isinstance(self.children[0], Tree):
            return [ indent_str*level, self._pretty_label(), '\t', '%s' % self.children[0], '\n']
        l = [ indent_str*level, self._pretty_label(), '\n' ]
        for n in self.children:
            if isinstance(n, Tree):
                l += n._pretty(level+1, indent_str)
            else:
                l += [ indent_str*(level+1), '%s' % n, '\n' ]
        return l
    def pretty(self, indent_str='  '):
        return ''.join(self._pretty(0, indent_str))
 class Transformer(object):
    def _get_func(self, name):
        return getattr(self, name)
    def transform(self, tree):
        items = []
        for c in tree.children:
            try:
                items.append(self.transform(c) if isinstance(c, Tree) else c)
            except Discard:
                pass
        try:
            f = self._get_func(tree.data)
        except AttributeError:
            return self.__default__(tree.data, items)
        else:
            return f(items)
    def __default__(self, data, children):
        return Tree(data, children)
    def __mul__(self, other):
        return TransformerChain(self, other)
 class Discard(Exception):
    pass
 class TransformerChain(object):
    def __init__(self, *transformers):
        self.transformers = transformers
    def transform(self, tree):
        for t in self.transformers:
            tree = t.transform(tree)
        return tree
    def __mul__(self, other):
        return TransformerChain(*self.transformers + (other,))
 class InlineTransformer(Transformer):
    def _get_func(self, name):  # use super()._get_func
        return inline_args(getattr(self, name)).__get__(self)
 class Visitor(object):
    def visit(self, tree):
        for child in tree.children:
            if isinstance(child, Tree):
                self.visit(child)
        f = getattr(self, tree.data, self.__default__)
        f(tree)
        return tree
    def __default__(self, tree):
        pass
 class Visitor_NoRecurse(Visitor):
    def visit(self, tree):
        subtrees = list(tree.iter_subtrees())
        for subtree in (subtrees):
            getattr(self, subtree.data, self.__default__)(subtree)
        return tree
 class Transformer_NoRecurse(Transformer):
    def transform(self, tree):
        subtrees = list(tree.iter_subtrees())
        def _t(t):
            # Assumes t is already transformed
            try:
                f = self._get_func(t.data)
            except AttributeError:
                return self.__default__(t)
            else:
                return f(t)
        for subtree in subtrees:
            children = []
            for c in subtree.children:
                try:
                    children.append(_t(c) if isinstance(c, Tree) else c)
                except Discard:
                    pass
            subtree.children = children
        return _t(tree)
    def __default__(self, t):
        return t
 class Indenter:
    def __init__(self):
        self.paren_level = 0
        self.indent_level = [0]
    def handle_NL(self, token):
        if self.paren_level > 0:
            return
        yield token
        indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
        indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
        if indent > self.indent_level[-1]:
            self.indent_level.append(indent)
            yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
        else:
            while indent < self.indent_level[-1]:
                self.indent_level.pop()
                yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
            assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1])
    def process(self, stream):
        for token in stream:
            if token.type == self.NL_type:
                for t in self.handle_NL(token):
                    yield t
            else:
                yield token
            if token.type in self.OPEN_PAREN_types:
                self.paren_level += 1
            elif token.type in self.CLOSE_PAREN_types:
                self.paren_level -= 1
                assert self.paren_level >= 0
        while len(self.indent_level) > 1:
            self.indent_level.pop()
            yield Token(self.DEDENT_type, '')
        assert self.indent_level == [0], self.indent_level
    # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
    @property
    def always_accept(self):
        return (self.NL_type,)
 class LexError(Exception):
    pass
 class UnexpectedInput(LexError):
    def __init__(self, seq, lex_pos, line, column, allowed=None):
        context = seq[lex_pos:lex_pos+5]
        message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)
        super(UnexpectedInput, self).__init__(message)
        self.line = line
        self.column = column
        self.context = context
        self.allowed = allowed
 class Token(Str):
    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None):
        inst = Str.__new__(cls, value)
        inst.type = type_
        inst.pos_in_stream = pos_in_stream
        inst.value = value
        inst.line = line
        inst.column = column
        return inst
    @classmethod
    def new_borrow_pos(cls, type_, value, borrow_t):
        return cls(type_, value, borrow_t.pos_in_stream, line=borrow_t.line, column=borrow_t.column)
    def __repr__(self):
        return 'Token(%s, %r)' % (self.type, self.value)
    def __deepcopy__(self, memo):
        return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
    def __eq__(self, other):
        if isinstance(other, Token) and self.type != other.type:
            return False
        return Str.__eq__(self, other)
    __hash__ = Str.__hash__
 class LineCounter:
    def __init__(self):
        self.newline_char = '\n'
        self.char_pos = 0
        self.line = 1
        self.column = 0
        self.line_start_pos = 0
    def feed(self, token, test_newline=True):
        """Consume a token and calculate the new line & column.
        As an optional optimization, set test_newline=False is token doesn't contain a newline.
        """
        if test_newline:
            newlines = token.count(self.newline_char)
            if newlines:
                self.line += newlines
                self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
        self.char_pos += len(token)
        self.column = self.char_pos - self.line_start_pos
 class _Lex:
    "Built to serve both Lexer and ContextualLexer"
    def __init__(self, lexer):
        self.lexer = lexer
    def lex(self, stream, newline_types, ignore_types):
        newline_types = list(newline_types)
        newline_types = list(newline_types)
        line_ctr = LineCounter()
        while True:
            lexer = self.lexer
            for mre, type_from_index in lexer.mres:
                m = mre.match(stream, line_ctr.char_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    if type_ not in ignore_types:
                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                        if t.type in lexer.callback:
                            t = lexer.callback[t.type](t)
                        lexer = yield t
                    line_ctr.feed(value, type_ in newline_types)
                    break
            else:
                if line_ctr.char_pos < len(stream):
                    raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                break
 class UnlessCallback:
    def __init__(self, mres):
        self.mres = mres
    def __call__(self, t):
        for mre, type_from_index in self.mres:
            m = mre.match(t.value)
            if m:
                value = m.group(0)
                t.type = type_from_index[m.lastindex]
                break
        return t
 class NodeBuilder:
    def __init__(self, tree_class, name):
        self.tree_class = tree_class
        self.name = name
    def __call__(self, children):
        return self.tree_class(self.name, children)
 class Expand1:
    def __init__(self, node_builder):
        self.node_builder = node_builder
    def __call__(self, children):
        if len(children) == 1:
            return children[0]
        else:
            return self.node_builder(children)
 class Factory:
    def __init__(self, cls, *args):
        self.cls = cls
        self.args = args
    def __call__(self, node_builder):
        return self.cls(node_builder, *self.args)
 class TokenWrapper:
    "Used for fixing the results of scanless parsing"
    def __init__(self, node_builder, token_name):
        self.node_builder = node_builder
        self.token_name = token_name
    def __call__(self, children):
        return self.node_builder( [Token(self.token_name, ''.join(children))] )
 def identity(node_builder):
    return node_builder
 class ChildFilter:
    def __init__(self, node_builder, to_include):
        self.node_builder = node_builder
        self.to_include = to_include
    def __call__(self, children):
        filtered = []
        for i, to_expand in self.to_include:
            if to_expand:
                filtered += children[i].children
            else:
                filtered.append(children[i])
        return self.node_builder(filtered)
 def create_rule_handler(expansion, keep_all_tokens, filter_out):
    # if not keep_all_tokens:
    to_include = [(i, not is_terminal(sym) and sym.startswith('_'))
                  for i, sym in enumerate(expansion)
                  if keep_all_tokens
                  or not ((is_terminal(sym) and sym.startswith('_')) or sym in filter_out)
                  ]
    if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
        return Factory(ChildFilter, to_include)
    # else, if no filtering required..
    return identity
 class PropagatePositions:
    def __init__(self, node_builder):
        self.node_builder = node_builder
    def __call__(self, children):
        res = self.node_builder(children)
        if children:
            for a in children:
                with suppress(AttributeError):
                    res.line = a.line
                    res.column = a.column
                break
            for a in reversed(children):
                with suppress(AttributeError):
                    res.end_line = a.end_line
                    res.end_col = a.end_col
                break
        return res
 class Callback(object):
    pass
 class ParseTreeBuilder:
    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False):
        self.tree_class = tree_class
        self.propagate_positions = propagate_positions
        self.always_keep_all_tokens = keep_all_tokens
        self.rule_builders = list(self._init_builders(rules))
        self.user_aliases = {}
    def _init_builders(self, rules):
        filter_out = set()
        for rule in rules:
            if rule.options and rule.options.filter_out:
                assert rule.origin.startswith('_')   # Just to make sure
                filter_out.add(rule.origin)
        for rule in rules:
            options = rule.options
            keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
            expand1 = options.expand1 if options else False
            create_token = options.create_token if options else False
            wrapper_chain = filter(None, [
                (expand1 and not rule.alias) and Expand1,
                create_token and Factory(TokenWrapper, create_token),
                create_rule_handler(rule.expansion, keep_all_tokens, filter_out),
                self.propagate_positions and PropagatePositions,
            ])
            yield rule, wrapper_chain
    def create_callback(self, transformer=None):
        callback = Callback()
        for rule, wrapper_chain in self.rule_builders:
            internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion))
            user_callback_name = rule.alias or rule.origin
            try:
                f = transformer._get_func(user_callback_name)
            except AttributeError:
                f = NodeBuilder(self.tree_class, user_callback_name)
            self.user_aliases[rule] = rule.alias
            rule.alias = internal_callback_name
            for w in wrapper_chain:
                f = w(f)
            if hasattr(callback, internal_callback_name):
                raise GrammarError("Rule '%s' already exists" % (rule,))
            setattr(callback, internal_callback_name, f)
        return callback
 class _Parser:
    def __init__(self, parse_table, callbacks):
        self.states = parse_table.states
        self.start_state = parse_table.start_state
        self.end_state = parse_table.end_state
        self.callbacks = callbacks
    def parse(self, seq, set_state=None):
        i = 0
        token = None
        stream = iter(seq)
        states = self.states
        state_stack = [self.start_state]
        value_stack = []
        if set_state: set_state(self.start_state)
        def get_action(key):
            state = state_stack[-1]
            try:
                return states[state][key]
            except KeyError:
                expected = states[state].keys()
                raise UnexpectedToken(token, expected, seq, i)
        def reduce(rule):
            size = len(rule.expansion)
            if size:
                s = value_stack[-size:]
                del state_stack[-size:]
                del value_stack[-size:]
            else:
                s = []
            value = self.callbacks[rule](s)
            _action, new_state = get_action(rule.origin)
            assert _action is Shift
            state_stack.append(new_state)
            value_stack.append(value)
        # Main LALR-parser loop
        try:
            token = next(stream)
            i += 1
            while True:
                action, arg = get_action(token.type)
                assert arg != self.end_state
                if action is Shift:
                    state_stack.append(arg)
                    value_stack.append(token)
                    if set_state: set_state(arg)
                    token = next(stream)
                    i += 1
                else:
                    reduce(arg)
        except StopIteration:
            pass
        while True:
            _action, arg = get_action('$END')
            if _action is Shift:
                assert arg == self.end_state
                val ,= value_stack
                return val
            else:
                reduce(arg)
 class Rule(object):
    """
        origin : a symbol
        expansion : a list of symbols
    """
    def __init__(self, origin, expansion, alias=None, options=None):
        self.origin = origin
        self.expansion = expansion
        self.alias = alias
        self.options = options
    def __str__(self):
        return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))
    def __repr__(self):
        return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
 class RuleOptions:
    def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.create_token = create_token  # used for scanless postprocessing
        self.priority = priority
        self.filter_out = filter_out        # remove this rule from the tree
                                            # used for "token"-rules in scanless
    def __repr__(self):
        return 'RuleOptions(%r, %r, %r, %r, %r)' % (
            self.keep_all_tokens,
            self.expand1,
            self.create_token,
            self.priority,
            self.filter_out
        )
 Shift = 0
 Reduce = 1
 import re
 MRES = (
 [('(?P<SIGNED_NUMBER>(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+))|(?P<ESCAPED_STRING>\\"(?:(?:\\\\\\"|[^"]))*\\")|(?P<WS>(?:[ \t\x0c'
  '\r\n'
  '])+)|(?P<__FALSE1>false)|(?P<__NULL2>null)|(?P<__TRUE0>true)|(?P<__COLON>\\:)|(?P<__COMMA>\\,)|(?P<__LBRACE>\\{)|(?P<__LSQB>\\[)|(?P<__RBRACE>\\})|(?P<__RSQB>\\])',
  {1: 'SIGNED_NUMBER',
   2: 'ESCAPED_STRING',
   3: 'WS',
   4: '__FALSE1',
   5: '__NULL2',
   6: '__TRUE0',
   7: '__COLON',
   8: '__COMMA',
   9: '__LBRACE',
   10: '__LSQB',
   11: '__RBRACE',
   12: '__RSQB'})]
 )
 LEXER_CALLBACK = (
 {}
 )
 NEWLINE_TYPES = ['WS']
 IGNORE_TYPES = ['WS']
 class LexerRegexps: pass
 lexer_regexps = LexerRegexps()
 lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]
 lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])
                          for n, mres in LEXER_CALLBACK.items()}
 lexer = _Lex(lexer_regexps)
 def lex(stream):
    return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)
 RULES = {
  0: Rule('start', ['value'], None, RuleOptions(False, True, None, None, False)),
  1: Rule('value', ['object'], None, RuleOptions(False, True, None, None, False)),
  2: Rule('value', ['array'], None, RuleOptions(False, True, None, None, False)),
  3: Rule('value', ['string'], None, RuleOptions(False, True, None, None, False)),
  4: Rule('value', ['SIGNED_NUMBER'], 'number', RuleOptions(False, True, None, None, False)),
  5: Rule('value', ['__TRUE0'], 'true', RuleOptions(False, True, None, None, False)),
  6: Rule('value', ['__FALSE1'], 'false', RuleOptions(False, True, None, None, False)),
  7: Rule('value', ['__NULL2'], 'null', RuleOptions(False, True, None, None, False)),
  8: Rule('array', ['__LSQB', 'value', '__anon_star_0', '__RSQB'], None, RuleOptions(False, False, None, None, False)),
  9: Rule('array', ['__LSQB', 'value', '__RSQB'], None, RuleOptions(False, False, None, None, False)),
  10: Rule('array', ['__LSQB', '__RSQB'], None, RuleOptions(False, False, None, None, False)),
  11: Rule('object', ['__LBRACE', 'pair', '__anon_star_1', '__RBRACE'], None, RuleOptions(False, False, None, None, False)),
  12: Rule('object', ['__LBRACE', 'pair', '__RBRACE'], None, RuleOptions(False, False, None, None, False)),
  13: Rule('object', ['__LBRACE', '__RBRACE'], None, RuleOptions(False, False, None, None, False)),
  14: Rule('pair', ['string', '__COLON', 'value'], None, RuleOptions(False, False, None, None, False)),
  15: Rule('string', ['ESCAPED_STRING'], None, RuleOptions(False, False, None, None, False)),
  16: Rule('__anon_star_0', ['__COMMA', 'value'], None, None),
  17: Rule('__anon_star_0', ['__anon_star_0', '__COMMA', 'value'], None, None),
  18: Rule('__anon_star_1', ['__COMMA', 'pair'], None, None),
  19: Rule('__anon_star_1', ['__anon_star_1', '__COMMA', 'pair'], None, None),
 }
 parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)
 class ParseTable: pass
 parse_table = ParseTable()
 STATES = {
  0: {0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 4: (0, 5), 5: (0, 6), 6: (0, 7), 7: (0, 8), 8: (0, 9), 9: (0, 10), 10: (0, 11), 11: (0, 12)},
  1: {12: (1, 5), 13: (1, 5), 14: (1, 5), 15: (1, 5)},
  2: {9: (0, 10), 14: (0, 13), 16: (0, 14), 11: (0, 15)},
  3: {12: (1, 2), 13: (1, 2), 14: (1, 2), 15: (1, 2)},
  4: {12: (1, 1), 13: (1, 1), 14: (1, 1), 15: (1, 1)},
  5: {12: (0, 16)},
  6: {7: (0, 17), 0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 5: (0, 6), 6: (0, 7), 8: (0, 9), 9: (0, 10), 15: (0, 18), 10: (0, 11), 11: (0, 12)},
  7: {12: (1, 4), 13: (1, 4), 14: (1, 4), 15: (1, 4)},
  8: {12: (1, 0)},
  9: {12: (1, 7), 13: (1, 7), 14: (1, 7), 15: (1, 7)},
  10: {12: (1, 15), 17: (1, 15), 13: (1, 15), 14: (1, 15), 15: (1, 15)},
  11: {12: (1, 6), 13: (1, 6), 14: (1, 6), 15: (1, 6)},
  12: {12: (1, 3), 13: (1, 3), 14: (1, 3), 15: (1, 3)},
  13: {13: (1, 13), 12: (1, 13), 14: (1, 13), 15: (1, 13)},
  14: {14: (0, 19), 13: (0, 20), 18: (0, 21)},
  15: {17: (0, 22)},
  16: {},
  17: {19: (0, 23), 15: (0, 24), 13: (0, 25)},
  18: {13: (1, 10), 12: (1, 10), 14: (1, 10), 15: (1, 10)},
  19: {13: (1, 12), 12: (1, 12), 14: (1, 12), 15: (1, 12)},
  20: {9: (0, 10), 11: (0, 15), 16: (0, 26)},
  21: {14: (0, 27), 13: (0, 28)},
  22: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12), 7: (0, 29)},
  23: {15: (0, 30), 13: (0, 31)},
  24: {13: (1, 9), 12: (1, 9), 14: (1, 9), 15: (1, 9)},
  25: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 7: (0, 32), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)},
  26: {13: (1, 18), 14: (1, 18)},
  27: {13: (1, 11), 12: (1, 11), 14: (1, 11), 15: (1, 11)},
  28: {16: (0, 33), 9: (0, 10), 11: (0, 15)},
  29: {13: (1, 14), 14: (1, 14)},
  30: {13: (1, 8), 12: (1, 8), 14: (1, 8), 15: (1, 8)},
  31: {5: (0, 6), 1: (0, 2), 0: (0, 1), 7: (0, 34), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)},
  32: {15: (1, 16), 13: (1, 16)},
  33: {13: (1, 19), 14: (1, 19)},
  34: {15: (1, 17), 13: (1, 17)},
 }
 TOKEN_TYPES = (
 {0: '__TRUE0',
 1: '__LBRACE',
 2: 'array',
 3: 'object',
 4: 'start',
 5: '__LSQB',
 6: 'SIGNED_NUMBER',
 7: 'value',
 8: '__NULL2',
 9: 'ESCAPED_STRING',
 10: '__FALSE1',
 11: 'string',
 12: '$END',
 13: '__COMMA',
 14: '__RBRACE',
 15: '__RSQB',
 16: 'pair',
 17: '__COLON',
 18: '__anon_star_1',
 19: '__anon_star_0'}
 )
 parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()}
                      for s, acts in STATES.items()}
 parse_table.start_state = 0
 parse_table.end_state = 16
 class Lark_StandAlone:
  def __init__(self, transformer=None, postlex=None):
     callback = parse_tree_builder.create_callback(transformer=transformer)
     callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES.values()}
     self.parser = _Parser(parse_table, callbacks)
     self.postlex = postlex
  def parse(self, stream):
     tokens = lex(stream)
     if self.postlex: tokens = self.postlex.process(tokens)
     return self.parser.parse(tokens)
--- a/examples/standalone/json_parser_main.py
+++ b/examples/standalone/json_parser_main.py
@@ -0,0 +1,25 @@
 import sys
 from json_parser import Lark_StandAlone, Transformer, inline_args
 class TreeToJson(Transformer):
    @inline_args
    def string(self, s):
        return s[1:-1].replace('\\"', '"')
    array = list
    pair = tuple
    object = dict
    number = inline_args(float)
    null = lambda self, _: None
    true = lambda self, _: True
    false = lambda self, _: False
 parser = Lark_StandAlone(transformer=TreeToJson())
 if __name__ == '__main__':
    with open(sys.argv[1]) as f:
        print(parser.parse(f.read()))
--- a/lark/init.py
+++ b/lark/init.py
@@ -4,4 +4,4 @@ from .lexer import UnexpectedInput, LexError
 from .lark import Lark
 from .utils import inline_args
 __version__ = "0.5.1"
 __version__ = "0.5.2"
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,16 +1,21 @@
 import re
 import sre_parse
 import sys
 from .utils import get_regexp_width
 Py36 = (sys.version_info[:2] >= (3, 6))
 ###{standalone
 def is_terminal(sym):
    return sym.isupper()
 class GrammarError(Exception):
    pass
 class ParseError(Exception):
    pass
 class UnexpectedToken(ParseError):
    def __init__(self, token, expected, seq, index):
        self.token = token
@@ -31,9 +36,8 @@ class UnexpectedToken(ParseError):
        super(UnexpectedToken, self).__init__(message)
 ###}
 def is_terminal(sym):
    return isinstance(sym, Terminal) or sym.isupper() or sym == '$end'
 class LexerConf:
@@ -44,7 +48,6 @@ class LexerConf:
 class ParserConf:
    def __init__(self, rules, callback, start):
        assert all(len(r) == 4 for r in rules)
        self.rules = rules
        self.callback = callback
        self.start = start
@@ -93,10 +96,10 @@ class PatternRE(Pattern):
    @property
    def min_width(self):
        return sre_parse.parse(self.to_regexp()).getwidth()[0]
        return get_regexp_width(self.to_regexp())[0]
    @property
    def max_width(self):
        return sre_parse.parse(self.to_regexp()).getwidth()[1]
        return get_regexp_width(self.to_regexp())[1]
 class TokenDef(object):
    def __init__(self, name, pattern, priority=1):
@@ -108,27 +111,3 @@ class TokenDef(object):
    def __repr__(self):
        return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
 class Terminal:
    def __init__(self, data):
        self.data = data
    def __repr__(self):
        return '%r' % self.data
    def __eq__(self, other):
        return isinstance(other, type(self)) and self.data == other.data
    def __hash__(self):
        return hash(self.data)
 class Terminal_Regexp(Terminal):
    def __init__(self, name, regexp):
        Terminal.__init__(self, regexp)
        self.name = name
        self.match = re.compile(regexp).match
 class Terminal_Token(Terminal):
    def match(self, other):
        return self.data == other.type
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -0,0 +1,37 @@
 class Rule(object):
    """
        origin : a symbol
        expansion : a list of symbols
    """
    def __init__(self, origin, expansion, alias=None, options=None):
        self.origin = origin
        self.expansion = expansion
        self.alias = alias
        self.options = options
    def __str__(self):
        return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))
    def __repr__(self):
        return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
 class RuleOptions:
    def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.create_token = create_token  # used for scanless postprocessing
        self.priority = priority
        self.filter_out = filter_out        # remove this rule from the tree
                                            # used for "token"-rules in scanless
    def __repr__(self):
        return 'RuleOptions(%r, %r, %r, %r, %r)' % (
            self.keep_all_tokens,
            self.expand1,
            self.create_token,
            self.priority,
            self.filter_out
        )
--- a/lark/grammars/common.g
+++ b/lark/grammars/common.g
@@ -12,6 +12,7 @@ DECIMAL: INT "." INT? | "." INT
 // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
 _EXP: ("e"|"E") SIGNED_INT
 FLOAT: INT _EXP | DECIMAL _EXP?
 SIGNED_FLOAT: ["+"|"-"] INT
 NUMBER: FLOAT | INT
 SIGNED_NUMBER: ["+"|"-"] NUMBER
--- a/lark/indenter.py
+++ b/lark/indenter.py
@@ -2,6 +2,7 @@
 from .lexer import Token
 ###{standalone
 class Indenter:
    def __init__(self):
        self.paren_level = 0
@@ -50,3 +51,5 @@ class Indenter:
    @property
    def always_accept(self):
        return (self.NL_type,)
 ###}
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -169,13 +169,15 @@ class Lark:
    def _build_parser(self):
        self.parser_class = get_frontend(self.options.parser, self.options.lexer)
        self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
        rules, callback = self.parse_tree_builder.apply(self.options.transformer)
        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
        callback = self._parse_tree_builder.create_callback(self.options.transformer)
        if self.profiler:
            for f in dir(callback):
                if not (f.startswith('__') and f.endswith('__')):
                    setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))
        parser_conf = ParserConf(rules, callback, self.options.start)
        parser_conf = ParserConf(self.rules, callback, self.options.start)
        return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -5,6 +5,7 @@ import re
 from .utils import Str, classify
 from .common import is_terminal, PatternStr, PatternRE, TokenDef
 ###{standalone
 class LexError(Exception):
    pass
@@ -48,27 +49,75 @@ class Token(Str):
    __hash__ = Str.__hash__
 class Regex:
    def __init__(self, pattern, flags=()):
        self.pattern = pattern
        self.flags = flags
 def _regexp_has_newline(r):
    return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)
 class LineCounter:
    def __init__(self):
        self.newline_char = '\n'
        self.char_pos = 0
        self.line = 1
        self.column = 0
        self.line_start_pos = 0
    def feed(self, token, test_newline=True):
        """Consume a token and calculate the new line & column.
        As an optional optimization, set test_newline=False is token doesn't contain a newline.
        """
        if test_newline:
            newlines = token.count(self.newline_char)
            if newlines:
                self.line += newlines
                self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
        self.char_pos += len(token)
        self.column = self.char_pos - self.line_start_pos
 class _Lex:
    "Built to serve both Lexer and ContextualLexer"
    def __init__(self, lexer):
        self.lexer = lexer
    def lex(self, stream, newline_types, ignore_types):
        newline_types = list(newline_types)
        ignore_types = list(ignore_types)
        line_ctr = LineCounter()
 def _create_unless_callback(strs):
    mres = build_mres(strs, match_whole=True)
    def unless_callback(t):
        # if t in strs:
        #     t.type = strs[t]
        for mre, type_from_index in mres:
        while True:
            lexer = self.lexer
            for mre, type_from_index in lexer.mres:
                m = mre.match(stream, line_ctr.char_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    if type_ not in ignore_types:
                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                        if t.type in lexer.callback:
                            t = lexer.callback[t.type](t)
                        yield t
                    line_ctr.feed(value, type_ in newline_types)
                    break
            else:
                if line_ctr.char_pos < len(stream):
                    raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                break
 class UnlessCallback:
    def __init__(self, mres):
        self.mres = mres
    def __call__(self, t):
        for mre, type_from_index in self.mres:
            m = mre.match(t.value)
            if m:
                value = m.group(0)
                t.type = type_from_index[m.lastindex]
                break
        return t
    return unless_callback
 ###}
 def _create_unless(tokens):
    tokens_by_type = classify(tokens, lambda t: type(t.pattern))
@@ -85,7 +134,7 @@ def _create_unless(tokens):
                if strtok.pattern.flags <= retok.pattern.flags:
                    embedded_strs.add(strtok)
        if unless:
            callback[retok.name] = _create_unless_callback(unless)
            callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True))
    tokens = [t for t in tokens if t not in embedded_strs]
    return tokens, callback
@@ -110,13 +159,13 @@ def _build_mres(tokens, max_size, match_whole):
 def build_mres(tokens, match_whole=False):
    return _build_mres(tokens, len(tokens), match_whole)
 def _regexp_has_newline(r):
    return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)
 class Lexer(object):
 class Lexer:
    def __init__(self, tokens, ignore=()):
        assert all(isinstance(t, TokenDef) for t in tokens), tokens
        self.ignore = ignore
        self.newline_char = '\n'
        tokens = list(tokens)
        # Sanitization
@@ -129,14 +178,11 @@ class Lexer(object):
            if t.pattern.min_width == 0:
                raise LexError("Lexer does not allow zero-width tokens. (%s: %s)" % (t.name, t.pattern))
        token_names = {t.name for t in tokens}
        for t in ignore:
            if t not in token_names:
                raise LexError("Token '%s' was marked to ignore but it is not defined!" % t)
        assert set(ignore) <= {t.name for t in tokens}
        # Init
        self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
        self.ignore_types = [t for t in ignore]
        self.ignore_types = list(ignore)
        tokens.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
@@ -147,46 +193,8 @@ class Lexer(object):
        self.mres = build_mres(tokens)
    def lex(self, stream):
        lex_pos = 0
        line = 1
        col_start_pos = 0
        newline_types = list(self.newline_types)
        ignore_types = list(self.ignore_types)
        while True:
            for mre, type_from_index in self.mres:
                m = mre.match(stream, lex_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    to_yield = type_ not in ignore_types
                    if to_yield:
                        t = Token(type_, value, lex_pos, line, lex_pos - col_start_pos)
                        end_col = t.column + len(value)
                        if t.type in self.callback:
                            t = self.callback[t.type](t)
                    if type_ in newline_types:
                        newlines = value.count(self.newline_char)
                        if newlines:
                            line += newlines
                            last_newline_index = value.rindex(self.newline_char) + 1
                            col_start_pos = lex_pos + last_newline_index
                            end_col = len(value) - last_newline_index
                    if to_yield:
                        t.end_line = line
                        t.end_col = end_col
                        yield t
                    lex_pos += len(value)
                    break
            else:
                if lex_pos < len(stream):
                    raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos)
                break
        return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
 class ContextualLexer:
@@ -204,7 +212,7 @@ class ContextualLexer:
                lexer = lexer_by_tokens[key]
            except KeyError:
                accepts = set(accepts) | set(ignore) | set(always_accept)
                state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$end']
                state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
                lexer = Lexer(state_tokens, ignore=ignore)
                lexer_by_tokens[key] = lexer
@@ -218,33 +226,9 @@ class ContextualLexer:
        self.parser_state = state
    def lex(self, stream):
        lex_pos = 0
        line = 1
        col_start_pos = 0
        newline_types = list(self.root_lexer.newline_types)
        ignore_types = list(self.root_lexer.ignore_types)
        while True:
            lexer = self.lexers[self.parser_state]
            for mre, type_from_index in lexer.mres:
                m = mre.match(stream, lex_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    if type_ not in ignore_types:
                        t = Token(type_, value, lex_pos, line, lex_pos - col_start_pos)
                        if t.type in lexer.callback:
                            t = lexer.callback[t.type](t)
                        yield t
        l = _Lex(self.lexers[self.parser_state])
        for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
            yield x
            l.lexer = self.lexers[self.parser_state]
                    if type_ in newline_types:
                        newlines = value.count(lexer.newline_char)
                        if newlines:
                            line += newlines
                            col_start_pos = lex_pos + value.rindex(lexer.newline_char)
                    lex_pos += len(value)
                    break
            else:
                if lex_pos < len(stream):
                    raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens)
                break
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -12,6 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
 from .parser_frontends import LALR
 from .parsers.lalr_parser import UnexpectedToken
 from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
 from .grammar import RuleOptions, Rule
 from .tree import Tree as T, Transformer, InlineTransformer, Visitor
@@ -127,7 +128,7 @@ RULES = {
 class EBNF_to_BNF(InlineTransformer):
    def __init__(self):
        self.new_rules = {}
        self.new_rules = []
        self.rules_by_expr = {}
        self.prefix = 'anon'
        self.i = 0
@@ -140,7 +141,8 @@ class EBNF_to_BNF(InlineTransformer):
        new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
        self.i += 1
        t = Token('RULE', new_name, -1)
        self.new_rules[new_name] = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]), self.rule_options
        tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])])
        self.new_rules.append((new_name, tree, self.rule_options))
        self.rules_by_expr[expr] = t
        return t
@@ -174,7 +176,6 @@ class SimplifyRule_Visitor(Visitor):
                break
            tree.expand_kids_by_index(*to_expand)
    def expansion(self, tree):
        # rules_list unpacking
        # a : b (c|d) e
@@ -194,7 +195,7 @@ class SimplifyRule_Visitor(Visitor):
                    tree.data = 'expansions'
                    tree.children = [self.visit(T('expansion', [option if i==j else other
                                                                for j, other in enumerate(tree.children)]))
                                     for option in child.children]
                                     for option in set(child.children)]
                    break
            else:
                break
@@ -208,7 +209,10 @@ class SimplifyRule_Visitor(Visitor):
            tree.data = 'expansions'
            tree.children = aliases
    expansions = _flatten
    def expansions(self, tree):
        self._flatten(tree)
        tree.children = list(set(tree.children))
 class RuleTreeToText(Transformer):
    def expansions(self, x):
@@ -389,12 +393,6 @@ def _interleave(l, item):
 def _choice_of_rules(rules):
    return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules])
 def dict_update_safe(d1, d2):
    for k, v in d2.items():
        assert k not in d1
        d1[k] = v
 class Grammar:
    def __init__(self, rule_defs, token_defs, ignore):
        self.token_defs = token_defs
@@ -411,6 +409,7 @@ class Grammar:
        terms_to_ignore = {name:'__'+name for name in self.ignore}
        if terms_to_ignore:
            assert set(terms_to_ignore) <= {name for name, _t in term_defs}
            term_defs = [(terms_to_ignore.get(name,name),t) for name,t in term_defs]
            expr = Token('RULE', '__ignore')
            for r, tree, _o in rule_defs:
@@ -466,57 +465,41 @@ class Grammar:
        # =================
        #  Compile Rules
        # =================
        ebnf_to_bnf = EBNF_to_BNF()
        simplify_rule = SimplifyRule_Visitor()
        # 1. Pre-process terminals
        transformer = PrepareLiterals()
        if not lexer:
            transformer *= SplitLiterals()
        transformer *= ExtractAnonTokens(tokens)   # Adds to tokens
        rules = {}
        # 2. Convert EBNF to BNF (and apply step 1)
        ebnf_to_bnf = EBNF_to_BNF()
        rules = []
        for name, rule_tree, options in rule_defs:
            assert name not in rules, name
            ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
            tree = transformer.transform(rule_tree)
            rules[name] = ebnf_to_bnf.transform(tree), options
            rules.append((name, ebnf_to_bnf.transform(tree), options))
        rules += ebnf_to_bnf.new_rules
        dict_update_safe(rules, ebnf_to_bnf.new_rules)
        for tree, _o in rules.values():
            simplify_rule.visit(tree)
        assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
        # 3. Compile tree to Rule objects
        rule_tree_to_text = RuleTreeToText()
        rules = {origin: (rule_tree_to_text.transform(tree), options) for origin, (tree, options) in rules.items()}
        return tokens, rules, self.ignore
        simplify_rule = SimplifyRule_Visitor()
        compiled_rules = []
        for name, tree, options in rules:
            simplify_rule.visit(tree)
            expansions = rule_tree_to_text.transform(tree)
            for expansion, alias in expansions:
                if alias and name.startswith('_'):
                    raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
 class RuleOptions:
    def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.create_token = create_token  # used for scanless postprocessing
        self.priority = priority
        self.filter_out = filter_out        # remove this rule from the tree
                                            # used for "token"-rules in scanless
    @classmethod
    def from_rule(cls, name, *x):
        if len(x) > 1:
            priority, expansions = x
            priority = int(priority)
        else:
            expansions ,= x
            priority = None
        keep_all_tokens = name.startswith('!')
        name = name.lstrip('!')
        expand1 = name.startswith('?')
        name = name.lstrip('?')
                rule = Rule(name, expansion, alias, options)
                compiled_rules.append(rule)
        return name, expansions, cls(keep_all_tokens, expand1, priority=priority)
        return tokens, compiled_rules, self.ignore
@@ -553,15 +536,30 @@ def resolve_token_references(token_defs):
        if not changed:
            break
 def options_from_rule(name, *x):
    if len(x) > 1:
        priority, expansions = x
        priority = int(priority)
    else:
        expansions ,= x
        priority = None
    keep_all_tokens = name.startswith('!')
    name = name.lstrip('!')
    expand1 = name.startswith('?')
    name = name.lstrip('?')
    return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)
 class GrammarLoader:
    def __init__(self):
        tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()]
        rules = [RuleOptions.from_rule(name, x) for name, x in RULES.items()]
        d = {r: ([(x.split(), None) for x in xs], o) for r, xs, o in rules}
        rules, callback = ParseTreeBuilder(d, T).apply()
        rules = [options_from_rule(name, x) for name, x in RULES.items()]
        rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
        callback = ParseTreeBuilder(rules, T).create_callback()
        lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])
        parser_conf = ParserConf(rules, callback, 'start')
        self.parser = LALR(lexer_conf, parser_conf)
@@ -636,7 +634,6 @@ class GrammarLoader:
            ignore_names.append(name)
            token_defs.append((name, (t, 0)))
        # Verify correctness 2
        token_names = set()
        for name, _ in token_defs:
@@ -644,10 +641,13 @@ class GrammarLoader:
                raise GrammarError("Token '%s' defined more than once" % name)
            token_names.add(name)
        if set(ignore_names) > token_names:
            raise GrammarError("Tokens %s were marked to ignore but were not defined!" % (set(ignore_names) - token_names))
        # Resolve token references
        resolve_token_references(token_defs)
        rules = [RuleOptions.from_rule(*x) for x in rule_defs]
        rules = [options_from_rule(*x) for x in rule_defs]
        rule_names = set()
        for name, _x, _o in rules:
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -1,6 +1,9 @@
 from .common import is_terminal, GrammarError
 from .utils import suppress
 from .lexer import Token
 from .grammar import Rule
 ###{standalone
 class NodeBuilder:
    def __init__(self, tree_class, name):
@@ -27,7 +30,7 @@ class Factory:
    def __call__(self, node_builder):
        return self.cls(node_builder, *self.args)
 class TokenWrapper:
    "Used for fixing the results of scanless parsing"
@@ -106,51 +109,53 @@ class ParseTreeBuilder:
        self.rule_builders = list(self._init_builders(rules))
        self.user_aliases = {}
    def _init_builders(self, rules):
        filter_out = set()
        for origin, (expansions, options) in rules.items():
            if options and options.filter_out:
                assert origin.startswith('_')   # Just to make sure
                filter_out.add(origin)
        for rule in rules:
            if rule.options and rule.options.filter_out:
                assert rule.origin.startswith('_')   # Just to make sure
                filter_out.add(rule.origin)
        for origin, (expansions, options) in rules.items():
        for rule in rules:
            options = rule.options
            keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
            expand1 = options.expand1 if options else False
            create_token = options.create_token if options else False
            for expansion, alias in expansions:
                if alias and origin.startswith('_'):
                        raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias))
            wrapper_chain = filter(None, [
                (expand1 and not rule.alias) and Expand1,
                create_token and Factory(TokenWrapper, create_token),
                create_rule_handler(rule.expansion, keep_all_tokens, filter_out),
                self.propagate_positions and PropagatePositions,
            ])
                wrapper_chain = filter(None, [
                    (expand1 and not alias) and Expand1,
                    create_token and Factory(TokenWrapper, create_token),
                    create_rule_handler(expansion, keep_all_tokens, filter_out),
                    self.propagate_positions and PropagatePositions,
                ])
            yield rule, wrapper_chain
                yield origin, expansion, options, alias or origin, wrapper_chain
    def apply(self, transformer=None):
    def create_callback(self, transformer=None):
        callback = Callback()
        new_rules = []
        for origin, expansion, options, alias, wrapper_chain in self.rule_builders:
            callback_name = '_callback_%s_%s' % (origin, '_'.join(expansion))
        for rule, wrapper_chain in self.rule_builders:
            internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion))
            user_callback_name = rule.alias or rule.origin
            try:
                f = transformer._get_func(alias)
                f = transformer._get_func(user_callback_name)
            except AttributeError:
                f = NodeBuilder(self.tree_class, alias)
                f = NodeBuilder(self.tree_class, user_callback_name)
            self.user_aliases[rule] = rule.alias
            rule.alias = internal_callback_name
            for w in wrapper_chain:
                f = w(f)
            if hasattr(callback, callback_name):
                raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin))
            setattr(callback, callback_name, f)
            if hasattr(callback, internal_callback_name):
                raise GrammarError("Rule '%s' already exists" % (rule,))
            setattr(callback, internal_callback_name, f)
            new_rules.append(( origin, expansion, callback_name, options ))
        return callback
        return new_rules, callback
 ###}
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,5 +1,5 @@
 import re
 import sre_parse
 from .utils import get_regexp_width
 from parsers.grammar_analysis import GrammarAnalyzer
 from .lexer import Lexer, ContextualLexer, Token
@@ -9,10 +9,16 @@ from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk
 from .tree import Tree
 class WithLexer:
    def __init__(self, lexer_conf):
    def init_traditional_lexer(self, lexer_conf):
        self.lexer_conf = lexer_conf
        self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
    def init_contextual_lexer(self, lexer_conf, parser_conf):
        self.lexer_conf = lexer_conf
        d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
        always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
        self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
    def lex(self, text):
        stream = self.lexer.lex(text)
        if self.lexer_conf.postlex:
@@ -23,32 +29,22 @@ class WithLexer:
 class LALR(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        WithLexer.__init__(self, lexer_conf)
        self.parser_conf = parser_conf
        self.parser = lalr_parser.Parser(parser_conf)
        self.init_traditional_lexer(lexer_conf)
    def parse(self, text):
        tokens = self.lex(text)
        return self.parser.parse(tokens)
        token_stream = self.lex(text)
        return self.parser.parse(token_stream)
 class LALR_ContextualLexer:
 class LALR_ContextualLexer(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        self.lexer_conf = lexer_conf
        self.parser_conf = parser_conf
        self.parser = lalr_parser.Parser(parser_conf)
        d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
        always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
        self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
        self.init_contextual_lexer(lexer_conf, parser_conf)
    def parse(self, text):
        tokens = self.lexer.lex(text)
        if self.lexer_conf.postlex:
            tokens = self.lexer_conf.postlex.process(tokens)
        return self.parser.parse(tokens, self.lexer.set_parser_state)
        token_stream = self.lex(text)
        return self.parser.parse(token_stream, self.lexer.set_parser_state)
 def get_ambiguity_resolver(options):
    if not options or options.ambiguity == 'resolve':
@@ -60,55 +56,47 @@ def get_ambiguity_resolver(options):
    raise ValueError(options)
 def tokenize_text(text):
    new_text = []
    line = 1
    col_start_pos = 0
    for i, ch in enumerate(text):
        if '\n' in ch:
            line += ch.count('\n')
            col_start_pos = i + ch.rindex('\n')
        new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos))
    return new_text
        yield Token('CHAR', ch, line=line, column=i - col_start_pos)
 class Earley_NoLex:
    def __init__(self, lexer_conf, parser_conf, options=None):
        self.token_by_name = {t.name:t for t in lexer_conf.tokens}
        self._prepare_match(lexer_conf)
        rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules]
        self.parser = earley.Parser(rules,
                                    parser_conf.start,
                                    parser_conf.callback,
        self.parser = earley.Parser(parser_conf, self.match,
                                    resolve_ambiguity=get_ambiguity_resolver(options))
    def _prepare_expansion(self, expansion):
        for sym in expansion:
            if is_terminal(sym):
                regexp = self.token_by_name[sym].pattern.to_regexp()
                width = sre_parse.parse(regexp).getwidth()
                if width != (1,1):
                    raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width))
                yield Terminal_Regexp(sym, regexp)
            else:
                yield sym
    def match(self, term, text, index=0):
        return self.regexps[term].match(text, index)
    def _prepare_match(self, lexer_conf):
        self.regexps = {}
        for t in lexer_conf.tokens:
            regexp = t.pattern.to_regexp()
            width = get_regexp_width(regexp)
            if width != (1,1):
                raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width))
            self.regexps[t.name] = re.compile(regexp)
    def parse(self, text):
        new_text = tokenize_text(text)
        return self.parser.parse(new_text)
        token_stream = tokenize_text(text)
        return self.parser.parse(token_stream)
 class Earley(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        WithLexer.__init__(self, lexer_conf)
        rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules]
        self.init_traditional_lexer(lexer_conf)
        self.parser = earley.Parser(rules,
                                    parser_conf.start,
                                    parser_conf.callback,
        self.parser = earley.Parser(parser_conf, self.match,
                                    resolve_ambiguity=get_ambiguity_resolver(options))
    def _prepare_expansion(self, expansion):
        return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion]
    def match(self, term, token):
        return term == token.type
    def parse(self, text):
        tokens = self.lex(text)
@@ -119,27 +107,31 @@ class XEarley:
    def __init__(self, lexer_conf, parser_conf, options=None):
        self.token_by_name = {t.name:t for t in lexer_conf.tokens}
        rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules]
        self._prepare_match(lexer_conf)
        ignore = [Terminal_Regexp(x, self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore]
        self.parser = xearley.Parser(rules,
                                    parser_conf.start,
                                    parser_conf.callback,
        self.parser = xearley.Parser(parser_conf,
                                    self.match,
                                    resolve_ambiguity=get_ambiguity_resolver(options),
                                    ignore=ignore,
                                    ignore=lexer_conf.ignore,
                                    predict_all=options.earley__predict_all
                                    )
    def _prepare_expansion(self, expansion):
        for sym in expansion:
            if is_terminal(sym):
                regexp = self.token_by_name[sym].pattern.to_regexp()
                width = sre_parse.parse(regexp).getwidth()
                assert width
                yield Terminal_Regexp(sym, regexp)
    def match(self, term, text, index=0):
        return self.regexps[term].match(text, index)
    def _prepare_match(self, lexer_conf):
        self.regexps = {}
        for t in lexer_conf.tokens:
            regexp = t.pattern.to_regexp()
            try:
                width = get_regexp_width(regexp)[0]
            except ValueError:
                raise ValueError("Bad regexp in token %s: %s" % (t.name, regexp))
            else:
                yield sym
                if width == 0:
                    raise ValueError("Dynamic Earley doesn't allow zero-width regexps")
            self.regexps[t.name] = re.compile(regexp)
    def parse(self, text):
        return self.parser.parse(text)
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -13,14 +13,11 @@
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 from ..common import ParseError, UnexpectedToken, Terminal
 from ..common import ParseError, UnexpectedToken, is_terminal
 from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse
 from .grammar_analysis import GrammarAnalyzer
 class EndToken:
    type = '$end'
 class Derivation(Tree):
    _hash = None
@@ -35,8 +32,6 @@ class Derivation(Tree):
            self._hash = Tree.__hash__(self)
        return self._hash
 END_TOKEN = EndToken()
 class Item(object):
    "An Earley Item, the atom of the algorithm."
@@ -59,11 +54,8 @@ class Item(object):
        new_tree = Derivation(self.rule, self.tree.children + [tree])
        return self.__class__(self.rule, self.ptr+1, self.start, new_tree)
    def similar(self, other):
        return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
    def __eq__(self, other):
        return self.similar(other) #and (self.tree == other.tree)
        return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
    def __hash__(self):
        return hash((self.rule, self.ptr, id(self.start)))   # Always runs Derivation.__hash__
@@ -134,7 +126,7 @@ class Column:
                    self.completed[item_key] = item
                self.to_reduce.append(item)
            else:
                if isinstance(item.expect, Terminal):
                if is_terminal(item.expect):
                    self.to_scan.append(item)
                else:
                    k = item_key if self.predict_all else item
@@ -151,31 +143,30 @@ class Column:
    __nonzero__ = __bool__  # Py2 backwards-compatibility
 class Parser:
    def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None):
        self.analysis = GrammarAnalyzer(rules, start_symbol)
        self.start_symbol = start_symbol
    def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
        self.analysis = GrammarAnalyzer(parser_conf)
        self.parser_conf = parser_conf
        self.resolve_ambiguity = resolve_ambiguity
        self.FIRST = self.analysis.FIRST
        self.postprocess = {}
        self.predictions = {}
        self.FIRST = {}
        for rule in self.analysis.rules:
            if rule.origin != '$root':  # XXX kinda ugly
                a = rule.alias
                self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
                self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
        for rule in parser_conf.rules:
            self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
            self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
                self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]
        self.term_matcher = term_matcher
    def parse(self, stream, start_symbol=None):
        # Define parser functions
        start_symbol = start_symbol or self.start_symbol
        start_symbol = start_symbol or self.parser_conf.start
        _Item = Item
        match = self.term_matcher
        def predict(nonterm, column):
            assert not isinstance(nonterm, Terminal), nonterm
            assert not is_terminal(nonterm), nonterm
            return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
        def complete(item):
@@ -195,14 +186,13 @@ class Parser:
                for item in to_reduce:
                    new_items = list(complete(item))
                    for new_item in new_items:
                        if new_item.similar(item):
                            raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
                    if item in new_items:
                        raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
                    column.add(new_items)
        def scan(i, token, column):
            next_set = Column(i, self.FIRST)
            next_set.add(item.advance(token) for item in column.to_scan if item.expect.match(token))
            next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token))
            if not next_set:
                expect = {i.expect for i in column.to_scan}
@@ -249,24 +239,3 @@ class ApplyCallbacks(Transformer_NoRecurse):
            return callback(children)
        else:
            return Tree(rule.origin, children)
 # RULES = [
 #     ('a', ['d']),
 #     ('d', ['b']),
 #     ('b', ['C']),
 #     ('b', ['b', 'C']),
 #     ('b', ['C', 'b']),
 # ]
 # p = Parser(RULES, 'a')
 # for x in p.parse('CC'):
 #     print x.pretty()
 #---------------
 # RULES = [
 #     ('s', ['a', 'a']),
 #     ('a', ['b', 'b']),
 #     ('b', ['C'], lambda (x,): x),
 #     ('b', ['b', 'C']),
 # ]
 # p = Parser(RULES, 's', {})
 # print p.parse('CCCCC').pretty()
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -1,20 +1,8 @@
 from ..utils import bfs, fzset
 from ..common import GrammarError, is_terminal
 from ..grammar import Rule
 class Rule(object):
    """
        origin : a symbol
        expansion : a list of symbols
    """
    def __init__(self, origin, expansion, alias=None, options=None):
        self.origin = origin
        self.expansion = expansion
        self.alias = alias
        self.options = options
    def __repr__(self):
        return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))
 class RulePtr(object):
    def __init__(self, rule, index):
@@ -106,28 +94,30 @@ def calculate_sets(rules):
 class GrammarAnalyzer(object):
    def __init__(self, rule_tuples, start_symbol, debug=False):
        self.start_symbol = start_symbol
    def __init__(self, parser_conf, debug=False):
        rules = parser_conf.rules
        assert len(rules) == len(set(rules))
        self.start_symbol = parser_conf.start
        self.debug = debug
        rule_tuples = list(rule_tuples)
        rule_tuples.append(('$root', [start_symbol, '$end']))
        rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples]
        self.rules = set()
        self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples}
        for origin, exp, alias, options in rule_tuples:
            r =  Rule( origin, exp, alias, options )
            self.rules.add(r)
            self.rules_by_origin[origin].append(r)
        for r in self.rules:
        root_rule = Rule('$root', [self.start_symbol, '$END'])
        self.rules_by_origin = {r.origin: [] for r in rules}
        for r in rules:
            self.rules_by_origin[r.origin].append(r)
        self.rules_by_origin[root_rule.origin] = [root_rule]
        for r in rules:
            for sym in r.expansion:
                if not (is_terminal(sym) or sym in self.rules_by_origin):
                    raise GrammarError("Using an undefined rule: %s" % sym)
        self.init_state = self.expand_rule('$root')
        self.start_state = self.expand_rule('$root')
        self.rules = rules
        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules + [root_rule])
    def expand_rule(self, rule):
        "Returns all init_ptrs accessible by rule (recursive)"
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -14,7 +14,43 @@ from ..common import GrammarError, is_terminal
 from .grammar_analysis import GrammarAnalyzer
 ACTION_SHIFT = 0
 class Action:
    def __init__(self, name):
        self.name = name
    def __str__(self):
        return self.name
    def __repr__(self):
        return str(self)
 Shift = Action('Shift')
 Reduce = Action('Reduce')
 class ParseTable:
    def __init__(self, states, start_state, end_state):
        self.states = states
        self.start_state = start_state
        self.end_state = end_state
 class IntParseTable(ParseTable):
    @classmethod
    def from_ParseTable(cls, parse_table):
        enum = list(parse_table.states)
        state_to_idx = {s:i for i,s in enumerate(enum)}
        int_states = {}
        for s, la in parse_table.states.items():
            la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
                  for k,v in la.items()}
            int_states[ state_to_idx[s] ] = la
        start_state = state_to_idx[parse_table.start_state]
        end_state = state_to_idx[parse_table.end_state]
        return cls(int_states, start_state, end_state)
 class LALR_Analyzer(GrammarAnalyzer):
@@ -27,7 +63,7 @@ class LALR_Analyzer(GrammarAnalyzer):
            sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
            for rp in sat:
                for term in self.FOLLOW.get(rp.rule.origin, ()):
                    lookahead[term].append(('reduce', rp.rule))
                    lookahead[term].append((Reduce, rp.rule))
            d = classify(unsat, lambda rp: rp.next)
            for sym, rps in d.items():
@@ -38,8 +74,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                        rps |= self.expand_rule(rp.next)
                new_state = fzset(rps)
                lookahead[sym].append(('shift', new_state))
                if sym == '$end':
                lookahead[sym].append((Shift, new_state))
                if sym == '$END':
                    self.end_states.append( new_state )
                yield fzset(rps)
@@ -50,7 +86,7 @@ class LALR_Analyzer(GrammarAnalyzer):
                    for x in v:
                        # XXX resolving shift/reduce into shift, like PLY
                        # Give a proper warning
                        if x[0] == 'shift':
                        if x[0] is Shift:
                            lookahead[k] = [x]
            for k, v in lookahead.items():
@@ -59,22 +95,15 @@ class LALR_Analyzer(GrammarAnalyzer):
            self.states[state] = {k:v[0] for k, v in lookahead.items()}
        for _ in bfs([self.init_state], step):
        for _ in bfs([self.start_state], step):
            pass
        self.end_state ,= self.end_states
        # --
        self.enum = list(self.states)
        self.enum_rev = {s:i for i,s in enumerate(self.enum)}
        self.states_idx = {}
        for s, la in self.states.items():
            la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift'
                    else (v[0], (v[1], len(v[1].expansion)))    # Reduce
                  for k,v in la.items()}
            self.states_idx[ self.enum_rev[s] ] = la
        self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
        if self.debug:
            self.parse_table = self._parse_table
        else:
            self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
        self.init_state_idx = self.enum_rev[self.init_state]
        self.end_state_idx = self.enum_rev[self.end_state]
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -3,30 +3,30 @@
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 from ..common import ParseError, UnexpectedToken
 from ..common import UnexpectedToken
 from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT
 class FinalReduce:
    def __init__(self, value):
        self.value = value
 from .lalr_analysis import LALR_Analyzer, Shift
 class Parser:
    def __init__(self, parser_conf):
        assert all(o is None or o.priority is None for n,x,a,o in parser_conf.rules), "LALR doesn't yet support prioritization"
        self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start)
        assert all(r.options is None or r.options.priority is None
                   for r in parser_conf.rules), "LALR doesn't yet support prioritization"
        self.analysis = analysis = LALR_Analyzer(parser_conf)
        analysis.compute_lookahead()
        callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
                          for rule in analysis.rules}
        self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks)
        self.parser_conf = parser_conf
        self.parser = _Parser(analysis.parse_table, callbacks)
        self.parse = self.parser.parse
 ###{standalone
 class _Parser:
    def __init__(self, states, init_state, end_state, callbacks):
        self.states = states
        self.init_state = init_state
        self.end_state = end_state
    def __init__(self, parse_table, callbacks):
        self.states = parse_table.states
        self.start_state = parse_table.start_state
        self.end_state = parse_table.end_state
        self.callbacks = callbacks
    def parse(self, seq, set_state=None):
@@ -35,10 +35,10 @@ class _Parser:
        stream = iter(seq)
        states = self.states
        state_stack = [self.init_state]
        state_stack = [self.start_state]
        value_stack = []
        if set_state: set_state(self.init_state)
        if set_state: set_state(self.start_state)
        def get_action(key):
            state = state_stack[-1]
@@ -49,7 +49,8 @@ class _Parser:
                raise UnexpectedToken(token, expected, seq, i)
        def reduce(rule, size):
        def reduce(rule):
            size = len(rule.expansion)
            if size:
                s = value_stack[-size:]
                del state_stack[-size:]
@@ -60,7 +61,7 @@ class _Parser:
            value = self.callbacks[rule](s)
            _action, new_state = get_action(rule.origin)
            assert _action == ACTION_SHIFT
            assert _action is Shift
            state_stack.append(new_state)
            value_stack.append(value)
@@ -72,22 +73,24 @@ class _Parser:
                action, arg = get_action(token.type)
                assert arg != self.end_state
                if action == ACTION_SHIFT:
                if action is Shift:
                    state_stack.append(arg)
                    value_stack.append(token)
                    if set_state: set_state(arg)
                    token = next(stream)
                    i += 1
                else:
                    reduce(*arg)
                    reduce(arg)
        except StopIteration:
            pass
        while True:
            _action, arg = get_action('$end')
            if _action == ACTION_SHIFT:
            _action, arg = get_action('$END')
            if _action is Shift:
                assert arg == self.end_state
                val ,= value_stack
                return val
            else:
                reduce(*arg)
                reduce(arg)
 ###}
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -20,7 +20,7 @@
 from collections import defaultdict
 from ..common import ParseError, UnexpectedToken, Terminal
 from ..common import ParseError, UnexpectedToken, is_terminal
 from ..lexer import Token, UnexpectedInput
 from ..tree import Tree
 from .grammar_analysis import GrammarAnalyzer
@@ -28,37 +28,34 @@ from .grammar_analysis import GrammarAnalyzer
 from .earley import ApplyCallbacks, Item, Column
 class Parser:
    def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=(), predict_all=False):
        self.analysis = GrammarAnalyzer(rules, start_symbol)
        self.start_symbol = start_symbol
    def __init__(self,  parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False):
        self.analysis = GrammarAnalyzer(parser_conf)
        self.parser_conf = parser_conf
        self.resolve_ambiguity = resolve_ambiguity
        self.ignore = list(ignore)
        self.predict_all = predict_all
        self.FIRST = self.analysis.FIRST
        self.postprocess = {}
        self.predictions = {}
        self.FIRST = {}
        for rule in self.analysis.rules:
            if rule.origin != '$root':  # XXX kinda ugly
                a = rule.alias
                self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
                self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
        for rule in parser_conf.rules:
            self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
            self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
                self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]
        self.term_matcher = term_matcher
    def parse(self, stream, start_symbol=None):
        # Define parser functions
        start_symbol = start_symbol or self.start_symbol
        start_symbol = start_symbol or self.parser_conf.start
        delayed_matches = defaultdict(list)
        match = self.term_matcher
        text_line = 1
        text_column = 0
        def predict(nonterm, column):
            assert not isinstance(nonterm, Terminal), nonterm
            assert not is_terminal(nonterm), nonterm
            return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
        def complete(item):
@@ -77,16 +74,15 @@ class Parser:
                    column.add( predict(nonterm, column) )
                for item in to_reduce:
                    new_items = list(complete(item))
                    for new_item in new_items:
                        if new_item.similar(item):
                            raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
                    if item in new_items:
                        raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
                    column.add(new_items)
        def scan(i, token, column):
            to_scan = column.to_scan
            for x in self.ignore:
                m = x.match(stream, i)
                m = match(x, stream, i)
                if m:
                    delayed_matches[m.end()] += set(to_scan)
                    delayed_matches[m.end()] += set(column.to_reduce)
@@ -99,16 +95,16 @@ class Parser:
                    #         delayed_matches[m.end()] += to_scan
            for item in to_scan:
                m = item.expect.match(stream, i)
                m = match(item.expect, stream, i)
                if m:
                    t = Token(item.expect.name, m.group(0), i, text_line, text_column)
                    t = Token(item.expect, m.group(0), i, text_line, text_column)
                    delayed_matches[m.end()].append(item.advance(t))
                    s = m.group(0)
                    for j in range(1, len(s)):
                        m = item.expect.match(s[:-j])
                        m = match(item.expect, s[:-j])
                        if m:
                            t = Token(item.expect.name, m.group(0), i, text_line, text_column)
                            t = Token(item.expect, m.group(0), i, text_line, text_column)
                            delayed_matches[i+m.end()].append(item.advance(t))
            next_set = Column(i+1, self.FIRST, predict_all=self.predict_all)
@@ -131,7 +127,7 @@ class Parser:
            if token == '\n':
                text_line += 1
                text_column = 1
                text_column = 0
            else:
                text_column += 1
@@ -143,7 +139,7 @@ class Parser:
                     if n.rule.origin==start_symbol and n.start is column0]
        if not solutions:
            expected_tokens = [t.expect.name for t in column.to_scan]
            expected_tokens = [t.expect for t in column.to_scan]
            raise ParseError('Unexpected end of input! Expecting a terminal of: %s' % expected_tokens)
        elif len(solutions) == 1:
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -0,0 +1,203 @@
 ###{standalone
 #
 #
 #   Lark Stand-alone Generator Tool
 # ----------------------------------
 # Generates a stand-alone LALR(1) parser with a standard lexer
 #
 # Git:    https://github.com/erezsh/lark
 # Author: Erez Shinan (erezshin@gmail.com)
 #
 #
 #    >>> LICENSE
 #
 #    This tool and its generated code use a separate license from Lark.
 #
 #    It is licensed under GPLv2 or above.
 #
 #    If you wish to purchase a commercial license for this tool and its
 #    generated code, contact me via email.
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 2 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    See <http://www.gnu.org/licenses/>.
 #
 #
 ###}
 import codecs
 import sys
 import os
 from pprint import pprint
 from os import path
 from collections import defaultdict
 import lark
 from lark import Lark
 from lark.parsers.lalr_analysis import Shift, Reduce
 from ..grammar import Rule
 __dir__ = path.dirname(__file__)
 __larkdir__ = path.join(__dir__, path.pardir)
 EXTRACT_STANDALONE_FILES = [
    'tools/standalone.py',
    'utils.py',
    'common.py',
    'tree.py',
    'indenter.py',
    'lexer.py',
    'parse_tree_builder.py',
    'parsers/lalr_parser.py',
 ]
 def extract_sections(lines):
    section = None
    text = []
    sections = defaultdict(list)
    for l in lines:
        if l.startswith('###'):
            if l[3] == '{':
                section = l[4:].strip()
            elif l[3] == '}':
                sections[section] += text
                section = None
                text = []
            else:
                raise ValueError(l)
        elif section:
            text.append(l)
    return {name:''.join(text) for name, text in sections.items()}
 class LexerAtoms:
    def __init__(self, lexer):
        self.mres = [(p.pattern,d) for p,d in lexer.mres]
        self.newline_types = lexer.newline_types
        self.ignore_types = lexer.ignore_types
        self.callback = {name:[(p.pattern,d) for p,d in c.mres]
                         for name, c in lexer.callback.items()}
    def print_python(self):
        print('import re')
        print('MRES = (')
        pprint(self.mres)
        print(')')
        print('LEXER_CALLBACK = (')
        pprint(self.callback)
        print(')')
        print('NEWLINE_TYPES = %s' % self.newline_types)
        print('IGNORE_TYPES = %s' % self.ignore_types)
        print('class LexerRegexps: pass')
        print('lexer_regexps = LexerRegexps()')
        print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]')
        print('lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])')
        print('                          for n, mres in LEXER_CALLBACK.items()}')
        print('lexer = _Lex(lexer_regexps)')
        print('def lex(stream):')
        print('    return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)')
 class GetRule:
    def __init__(self, rule_id):
        self.rule_id = rule_id
    def __repr__(self):
        return 'RULES[%d]' % self.rule_id
 rule_ids = {}
 token_types = {}
 def _get_token_type(token_type):
    if token_type not in token_types:
        token_types[token_type] = len(token_types)
    return token_types[token_type]
 class ParserAtoms:
    def __init__(self, parser):
        self.parse_table = parser.analysis.parse_table
    def print_python(self):
        print('class ParseTable: pass')
        print('parse_table = ParseTable()')
        print('STATES = {')
        for state, actions in self.parse_table.states.items():
            print('  %r: %r,' % (state, {_get_token_type(token): ((1, rule_ids[arg]) if action is Reduce else (0, arg))
                            for token, (action, arg) in actions.items()}))
        print('}')
        print('TOKEN_TYPES = (')
        pprint({v:k for k, v in token_types.items()})
        print(')')
        print('parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()}')
        print('                      for s, acts in STATES.items()}')
        print('parse_table.start_state = %s' % self.parse_table.start_state)
        print('parse_table.end_state = %s' % self.parse_table.end_state)
        print('class Lark_StandAlone:')
        print('  def __init__(self, transformer=None, postlex=None):')
        print('     callback = parse_tree_builder.create_callback(transformer=transformer)')
        print('     callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES.values()}')
        print('     self.parser = _Parser(parse_table, callbacks)')
        print('     self.postlex = postlex')
        print('  def parse(self, stream):')
        print('     tokens = lex(stream)')
        print('     if self.postlex: tokens = self.postlex.process(tokens)')
        print('     return self.parser.parse(tokens)')
 class TreeBuilderAtoms:
    def __init__(self, lark):
        self.rules = lark.rules
        self.ptb = lark._parse_tree_builder
    def print_python(self):
        print('RULES = {')
        for i, r in enumerate(self.rules):
            rule_ids[r] = i
            print('  %d: Rule(%r, %r, %r, %r),' % (i, r.origin, r.expansion, self.ptb.user_aliases[r], r.options ))
        print('}')
        print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)')
 def main(fn, start):
    with codecs.open(fn, encoding='utf8') as f:
        lark_inst = Lark(f, parser="lalr", start=start)
    lexer_atoms = LexerAtoms(lark_inst.parser.lexer)
    parser_atoms = ParserAtoms(lark_inst.parser.parser)
    tree_builder_atoms = TreeBuilderAtoms(lark_inst)
    print('# The file was automatically generated by Lark v%s' % lark.__version__)
    for pyfile in EXTRACT_STANDALONE_FILES:
        print (extract_sections(open(os.path.join(__larkdir__, pyfile)))['standalone'])
    print(open(os.path.join(__larkdir__, 'grammar.py')).read())
    print('Shift = 0')
    print('Reduce = 1')
    lexer_atoms.print_python()
    tree_builder_atoms.print_python()
    parser_atoms.print_python()
 if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Lark Stand-alone Generator Tool")
        print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
        sys.exit(1)
    if len(sys.argv) == 3:
        fn, start = sys.argv[1:]
    elif len(sys.argv) == 2:
        fn, start = sys.argv[1], 'start'
    else:
        assert False, sys.argv
    main(fn, start)
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -7,6 +7,7 @@ from copy import deepcopy
 from .utils import inline_args
 ###{standalone
 class Tree(object):
    def __init__(self, data, children, rule=None):
        self.data = data
@@ -34,6 +35,7 @@ class Tree(object):
    def pretty(self, indent_str='  '):
        return ''.join(self._pretty(0, indent_str))
 ###}
    def expand_kids_by_index(self, *indices):
        for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
@@ -100,6 +102,7 @@ class Tree(object):
 ###{standalone
 class Transformer(object):
    def _get_func(self, name):
        return getattr(self, name)
@@ -139,7 +142,7 @@ class TransformerChain(object):
    def __mul__(self, other):
        return TransformerChain(*self.transformers + (other,))
 class InlineTransformer(Transformer):
@@ -196,6 +199,7 @@ class Transformer_NoRecurse(Transformer):
    def __default__(self, t):
        return t
 ###}
 def pydot__tree_to_png(tree, filename):
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -1,7 +1,4 @@
 import functools
 import types
 from collections import deque
 from contextlib import contextmanager
 class fzset(frozenset):
    def __repr__(self):
@@ -49,8 +46,13 @@ try:
 except NameError:   # Python 3
    STRING_TYPE = str
 Str = type(u'')
 ###{standalone
 import types
 import functools
 from contextlib import contextmanager
 Str = type(u'')
 def inline_args(f):
    # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType)
@@ -76,19 +78,6 @@ def inline_args(f):
        return _f
 try:
    compare = cmp
 except NameError:
    def compare(a, b):
        if a == b:
            return 0
        elif a > b:
            return 1
        else:
            return -1
 try:
    from contextlib import suppress     # Python 3
 except ImportError:
@@ -107,6 +96,26 @@ except ImportError:
        except excs:
            pass
 ###}
 try:
    compare = cmp
 except NameError:
    def compare(a, b):
        if a == b:
            return 0
        elif a > b:
            return 1
        else:
            return -1
 import sre_parse
 import sre_constants
 def get_regexp_width(regexp):
    try:
        return sre_parse.parse(regexp).getwidth()
    except sre_constants.error:
        raise ValueError(regexp)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -126,7 +126,7 @@ class TestParsers(unittest.TestCase):
        r = T().transform(g.parse("x"))
        self.assertEqual( r.children, ["<b>"] )
        g = Lark("""start: a
                    ?a : b
                    b : "x"
@@ -142,14 +142,14 @@ class TestParsers(unittest.TestCase):
        r = T().transform(g.parse("xx"))
        self.assertEqual( r.children, ["<c>"] )
        g = Lark("""start: a
                    ?a : b b -> c
                    b : "x"
                 """, parser='lalr', transformer=T())
        r = g.parse("xx")
        self.assertEqual( r.children, ["<c>"] )
@@ -159,7 +159,7 @@ def _make_full_earley_test(LEXER):
            # Fails an Earley implementation without special handling for empty rules,
            # or re-processing of already completed rules.
            g = Lark(r"""start: B
                         B: ("ab"|/[^b]/)*
                         B: ("ab"|/[^b]/)+
                      """, lexer=LEXER)
            self.assertEqual( g.parse('abc').children[0], 'abc')
@@ -796,6 +796,49 @@ def _make_parser_test(LEXER, PARSER):
            self.assertEqual(tree.children, ['a', 'A'])
        def test_twice_empty(self):
            g = """!start: [["A"]]
                """
            l = _Lark(g)
            tree = l.parse('A')
            self.assertEqual(tree.children, ['A'])
            tree = l.parse('')
            self.assertEqual(tree.children, [])
        def test_undefined_ignore(self):
            g = """!start: "A"
                %ignore B
                """
            self.assertRaises( GrammarError, _Lark, g)
        @unittest.skipIf(LEXER==None, "TODO: Fix scanless parsing or get rid of it") # TODO
        def test_line_and_column(self):
            g = r"""!start: "A" bc "D"
                !bc: "B\nC"
                """
            l = _Lark(g)
            a, bc, d = l.parse("AB\nCD").children
            self.assertEqual(a.line, 1)
            self.assertEqual(a.column, 0)
            bc ,= bc.children
            self.assertEqual(bc.line, 1)
            self.assertEqual(bc.column, 1)
            self.assertEqual(d.line, 2)
            self.assertEqual(d.column, 1)
            # self.assertEqual(a.end_line, 1)
            # self.assertEqual(a.end_col, 1)
            # self.assertEqual(bc.end_line, 2)
            # self.assertEqual(bc.end_col, 1)
            # self.assertEqual(d.end_line, 2)
            # self.assertEqual(d.end_col, 2)
        def test_reduce_cycle(self):
            """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
            It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
@@ -969,7 +1012,7 @@ def _make_parser_test(LEXER, PARSER):
            parser = _Lark(grammar)
            tree = parser.parse("int 1 ! This is a comment\n")      
            tree = parser.parse("int 1 ! This is a comment\n")
            self.assertEqual(tree.children, ['1'])
            tree = parser.parse("int 1 ! This is a comment")    # A trailing ignore token can be tricky!
@@ -983,6 +1026,7 @@ def _make_parser_test(LEXER, PARSER):
            self.assertEqual(tree.children, [])
        @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
        def test_regex_escaping(self):
            g = _Lark("start: /[ab]/")