|
|
@@ -0,0 +1,794 @@ |
|
|
|
# The file was automatically generated by Lark v0.5.2 |
|
|
|
# |
|
|
|
# |
|
|
|
# Lark Stand-alone Generator Tool |
|
|
|
# ---------------------------------- |
|
|
|
# Generates a stand-alone LALR(1) parser with a standard lexer |
|
|
|
# |
|
|
|
# Git: https://github.com/erezsh/lark |
|
|
|
# Author: Erez Shinan (erezshin@gmail.com) |
|
|
|
# |
|
|
|
# |
|
|
|
# >>> LICENSE |
|
|
|
# |
|
|
|
# This tool and its generated code use a separate license from Lark. |
|
|
|
# |
|
|
|
# It is licensed under GPLv2 or above. |
|
|
|
# |
|
|
|
# If you wish to purchase a commercial license for this tool and its |
|
|
|
# generated code, contact me via email. |
|
|
|
# |
|
|
|
# This program is free software: you can redistribute it and/or modify |
|
|
|
# it under the terms of the GNU General Public License as published by |
|
|
|
# the Free Software Foundation, either version 2 of the License, or |
|
|
|
# (at your option) any later version. |
|
|
|
# |
|
|
|
# This program is distributed in the hope that it will be useful, |
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
|
# GNU General Public License for more details. |
|
|
|
# |
|
|
|
# See <http://www.gnu.org/licenses/>. |
|
|
|
# |
|
|
|
# |
|
|
|
|
|
|
|
|
|
|
|
import types |
|
|
|
import functools |
|
|
|
from contextlib import contextmanager |
|
|
|
|
|
|
|
Str = type(u'') |
|
|
|
|
|
|
|
def inline_args(f): |
|
|
|
# print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) |
|
|
|
if isinstance(f, types.FunctionType): |
|
|
|
@functools.wraps(f) |
|
|
|
def _f_func(self, args): |
|
|
|
return f(self, *args) |
|
|
|
return _f_func |
|
|
|
elif isinstance(f, (type, types.BuiltinFunctionType)): |
|
|
|
@functools.wraps(f) |
|
|
|
def _f_builtin(_self, args): |
|
|
|
return f(*args) |
|
|
|
return _f_builtin |
|
|
|
elif isinstance(f, types.MethodType): |
|
|
|
@functools.wraps(f.__func__) |
|
|
|
def _f(self, args): |
|
|
|
return f.__func__(self, *args) |
|
|
|
return _f |
|
|
|
else: |
|
|
|
@functools.wraps(f.__call__.__func__) |
|
|
|
def _f(self, args): |
|
|
|
return f.__call__.__func__(self, *args) |
|
|
|
return _f |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
from contextlib import suppress # Python 3 |
|
|
|
except ImportError: |
|
|
|
@contextmanager |
|
|
|
def suppress(*excs): |
|
|
|
'''Catch and dismiss the provided exception |
|
|
|
|
|
|
|
>>> x = 'hello' |
|
|
|
>>> with suppress(IndexError): |
|
|
|
... x = x[10] |
|
|
|
>>> x |
|
|
|
'hello' |
|
|
|
''' |
|
|
|
try: |
|
|
|
yield |
|
|
|
except excs: |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
def is_terminal(sym): |
|
|
|
return sym.isupper() |
|
|
|
|
|
|
|
class GrammarError(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
class ParseError(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
class UnexpectedToken(ParseError): |
|
|
|
def __init__(self, token, expected, seq, index): |
|
|
|
self.token = token |
|
|
|
self.expected = expected |
|
|
|
self.line = getattr(token, 'line', '?') |
|
|
|
self.column = getattr(token, 'column', '?') |
|
|
|
|
|
|
|
try: |
|
|
|
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) |
|
|
|
except AttributeError: |
|
|
|
context = seq[index:index+5] |
|
|
|
except TypeError: |
|
|
|
context = "<no context>" |
|
|
|
message = ("Unexpected token %r at line %s, column %s.\n" |
|
|
|
"Expected: %s\n" |
|
|
|
"Context: %s" % (token, self.line, self.column, expected, context)) |
|
|
|
|
|
|
|
super(UnexpectedToken, self).__init__(message) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Tree(object): |
|
|
|
def __init__(self, data, children): |
|
|
|
self.data = data |
|
|
|
self.children = list(children) |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return 'Tree(%s, %s)' % (self.data, self.children) |
|
|
|
|
|
|
|
def _pretty_label(self): |
|
|
|
return self.data |
|
|
|
|
|
|
|
def _pretty(self, level, indent_str): |
|
|
|
if len(self.children) == 1 and not isinstance(self.children[0], Tree): |
|
|
|
return [ indent_str*level, self._pretty_label(), '\t', '%s' % self.children[0], '\n'] |
|
|
|
|
|
|
|
l = [ indent_str*level, self._pretty_label(), '\n' ] |
|
|
|
for n in self.children: |
|
|
|
if isinstance(n, Tree): |
|
|
|
l += n._pretty(level+1, indent_str) |
|
|
|
else: |
|
|
|
l += [ indent_str*(level+1), '%s' % n, '\n' ] |
|
|
|
|
|
|
|
return l |
|
|
|
|
|
|
|
def pretty(self, indent_str=' '): |
|
|
|
return ''.join(self._pretty(0, indent_str)) |
|
|
|
class Transformer(object): |
|
|
|
def _get_func(self, name): |
|
|
|
return getattr(self, name) |
|
|
|
|
|
|
|
def transform(self, tree): |
|
|
|
items = [] |
|
|
|
for c in tree.children: |
|
|
|
try: |
|
|
|
items.append(self.transform(c) if isinstance(c, Tree) else c) |
|
|
|
except Discard: |
|
|
|
pass |
|
|
|
try: |
|
|
|
f = self._get_func(tree.data) |
|
|
|
except AttributeError: |
|
|
|
return self.__default__(tree.data, items) |
|
|
|
else: |
|
|
|
return f(items) |
|
|
|
|
|
|
|
def __default__(self, data, children): |
|
|
|
return Tree(data, children) |
|
|
|
|
|
|
|
def __mul__(self, other): |
|
|
|
return TransformerChain(self, other) |
|
|
|
|
|
|
|
|
|
|
|
class Discard(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
class TransformerChain(object): |
|
|
|
def __init__(self, *transformers): |
|
|
|
self.transformers = transformers |
|
|
|
|
|
|
|
def transform(self, tree): |
|
|
|
for t in self.transformers: |
|
|
|
tree = t.transform(tree) |
|
|
|
return tree |
|
|
|
|
|
|
|
def __mul__(self, other): |
|
|
|
return TransformerChain(*self.transformers + (other,)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class InlineTransformer(Transformer): |
|
|
|
def _get_func(self, name): # use super()._get_func |
|
|
|
return inline_args(getattr(self, name)).__get__(self) |
|
|
|
|
|
|
|
|
|
|
|
class Visitor(object): |
|
|
|
def visit(self, tree): |
|
|
|
for child in tree.children: |
|
|
|
if isinstance(child, Tree): |
|
|
|
self.visit(child) |
|
|
|
|
|
|
|
f = getattr(self, tree.data, self.__default__) |
|
|
|
f(tree) |
|
|
|
return tree |
|
|
|
|
|
|
|
def __default__(self, tree): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
class Visitor_NoRecurse(Visitor): |
|
|
|
def visit(self, tree): |
|
|
|
subtrees = list(tree.iter_subtrees()) |
|
|
|
|
|
|
|
for subtree in (subtrees): |
|
|
|
getattr(self, subtree.data, self.__default__)(subtree) |
|
|
|
return tree |
|
|
|
|
|
|
|
|
|
|
|
class Transformer_NoRecurse(Transformer): |
|
|
|
def transform(self, tree): |
|
|
|
subtrees = list(tree.iter_subtrees()) |
|
|
|
|
|
|
|
def _t(t): |
|
|
|
# Assumes t is already transformed |
|
|
|
try: |
|
|
|
f = self._get_func(t.data) |
|
|
|
except AttributeError: |
|
|
|
return self.__default__(t) |
|
|
|
else: |
|
|
|
return f(t) |
|
|
|
|
|
|
|
for subtree in subtrees: |
|
|
|
children = [] |
|
|
|
for c in subtree.children: |
|
|
|
try: |
|
|
|
children.append(_t(c) if isinstance(c, Tree) else c) |
|
|
|
except Discard: |
|
|
|
pass |
|
|
|
subtree.children = children |
|
|
|
|
|
|
|
return _t(tree) |
|
|
|
|
|
|
|
def __default__(self, t): |
|
|
|
return t |
|
|
|
|
|
|
|
class Indenter: |
|
|
|
def __init__(self): |
|
|
|
self.paren_level = 0 |
|
|
|
self.indent_level = [0] |
|
|
|
|
|
|
|
def handle_NL(self, token): |
|
|
|
if self.paren_level > 0: |
|
|
|
return |
|
|
|
|
|
|
|
yield token |
|
|
|
|
|
|
|
indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces |
|
|
|
indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len |
|
|
|
|
|
|
|
if indent > self.indent_level[-1]: |
|
|
|
self.indent_level.append(indent) |
|
|
|
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) |
|
|
|
else: |
|
|
|
while indent < self.indent_level[-1]: |
|
|
|
self.indent_level.pop() |
|
|
|
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) |
|
|
|
|
|
|
|
assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1]) |
|
|
|
|
|
|
|
def process(self, stream): |
|
|
|
for token in stream: |
|
|
|
if token.type == self.NL_type: |
|
|
|
for t in self.handle_NL(token): |
|
|
|
yield t |
|
|
|
else: |
|
|
|
yield token |
|
|
|
|
|
|
|
if token.type in self.OPEN_PAREN_types: |
|
|
|
self.paren_level += 1 |
|
|
|
elif token.type in self.CLOSE_PAREN_types: |
|
|
|
self.paren_level -= 1 |
|
|
|
assert self.paren_level >= 0 |
|
|
|
|
|
|
|
while len(self.indent_level) > 1: |
|
|
|
self.indent_level.pop() |
|
|
|
yield Token(self.DEDENT_type, '') |
|
|
|
|
|
|
|
assert self.indent_level == [0], self.indent_level |
|
|
|
|
|
|
|
# XXX Hack for ContextualLexer. Maybe there's a more elegant solution? |
|
|
|
@property |
|
|
|
def always_accept(self): |
|
|
|
return (self.NL_type,) |
|
|
|
|
|
|
|
|
|
|
|
class LexError(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
class UnexpectedInput(LexError): |
|
|
|
def __init__(self, seq, lex_pos, line, column, allowed=None): |
|
|
|
context = seq[lex_pos:lex_pos+5] |
|
|
|
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) |
|
|
|
|
|
|
|
super(UnexpectedInput, self).__init__(message) |
|
|
|
|
|
|
|
self.line = line |
|
|
|
self.column = column |
|
|
|
self.context = context |
|
|
|
self.allowed = allowed |
|
|
|
|
|
|
|
class Token(Str): |
|
|
|
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): |
|
|
|
inst = Str.__new__(cls, value) |
|
|
|
inst.type = type_ |
|
|
|
inst.pos_in_stream = pos_in_stream |
|
|
|
inst.value = value |
|
|
|
inst.line = line |
|
|
|
inst.column = column |
|
|
|
return inst |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def new_borrow_pos(cls, type_, value, borrow_t): |
|
|
|
return cls(type_, value, borrow_t.pos_in_stream, line=borrow_t.line, column=borrow_t.column) |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return 'Token(%s, %r)' % (self.type, self.value) |
|
|
|
|
|
|
|
def __deepcopy__(self, memo): |
|
|
|
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) |
|
|
|
|
|
|
|
def __eq__(self, other): |
|
|
|
if isinstance(other, Token) and self.type != other.type: |
|
|
|
return False |
|
|
|
|
|
|
|
return Str.__eq__(self, other) |
|
|
|
|
|
|
|
__hash__ = Str.__hash__ |
|
|
|
|
|
|
|
|
|
|
|
class LineCounter: |
|
|
|
def __init__(self): |
|
|
|
self.newline_char = '\n' |
|
|
|
self.char_pos = 0 |
|
|
|
self.line = 1 |
|
|
|
self.column = 0 |
|
|
|
self.line_start_pos = 0 |
|
|
|
|
|
|
|
def feed(self, token, test_newline=True): |
|
|
|
"""Consume a token and calculate the new line & column. |
|
|
|
|
|
|
|
As an optional optimization, set test_newline=False is token doesn't contain a newline. |
|
|
|
""" |
|
|
|
if test_newline: |
|
|
|
newlines = token.count(self.newline_char) |
|
|
|
if newlines: |
|
|
|
self.line += newlines |
|
|
|
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 |
|
|
|
|
|
|
|
self.char_pos += len(token) |
|
|
|
self.column = self.char_pos - self.line_start_pos |
|
|
|
|
|
|
|
class _Lex: |
|
|
|
"Built to serve both Lexer and ContextualLexer" |
|
|
|
def __init__(self, lexer): |
|
|
|
self.lexer = lexer |
|
|
|
|
|
|
|
def lex(self, stream, newline_types, ignore_types): |
|
|
|
newline_types = list(newline_types) |
|
|
|
newline_types = list(newline_types) |
|
|
|
line_ctr = LineCounter() |
|
|
|
|
|
|
|
while True: |
|
|
|
lexer = self.lexer |
|
|
|
for mre, type_from_index in lexer.mres: |
|
|
|
m = mre.match(stream, line_ctr.char_pos) |
|
|
|
if m: |
|
|
|
value = m.group(0) |
|
|
|
type_ = type_from_index[m.lastindex] |
|
|
|
if type_ not in ignore_types: |
|
|
|
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
if t.type in lexer.callback: |
|
|
|
t = lexer.callback[t.type](t) |
|
|
|
lexer = yield t |
|
|
|
|
|
|
|
line_ctr.feed(value, type_ in newline_types) |
|
|
|
break |
|
|
|
else: |
|
|
|
if line_ctr.char_pos < len(stream): |
|
|
|
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
break |
|
|
|
|
|
|
|
class UnlessCallback: |
|
|
|
def __init__(self, mres): |
|
|
|
self.mres = mres |
|
|
|
|
|
|
|
def __call__(self, t): |
|
|
|
for mre, type_from_index in self.mres: |
|
|
|
m = mre.match(t.value) |
|
|
|
if m: |
|
|
|
value = m.group(0) |
|
|
|
t.type = type_from_index[m.lastindex] |
|
|
|
break |
|
|
|
return t |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NodeBuilder: |
|
|
|
def __init__(self, tree_class, name): |
|
|
|
self.tree_class = tree_class |
|
|
|
self.name = name |
|
|
|
|
|
|
|
def __call__(self, children): |
|
|
|
return self.tree_class(self.name, children) |
|
|
|
|
|
|
|
class Expand1: |
|
|
|
def __init__(self, node_builder): |
|
|
|
self.node_builder = node_builder |
|
|
|
|
|
|
|
def __call__(self, children): |
|
|
|
if len(children) == 1: |
|
|
|
return children[0] |
|
|
|
else: |
|
|
|
return self.node_builder(children) |
|
|
|
|
|
|
|
class Factory: |
|
|
|
def __init__(self, cls, *args): |
|
|
|
self.cls = cls |
|
|
|
self.args = args |
|
|
|
|
|
|
|
def __call__(self, node_builder): |
|
|
|
return self.cls(node_builder, *self.args) |
|
|
|
|
|
|
|
|
|
|
|
class TokenWrapper: |
|
|
|
"Used for fixing the results of scanless parsing" |
|
|
|
|
|
|
|
def __init__(self, node_builder, token_name): |
|
|
|
self.node_builder = node_builder |
|
|
|
self.token_name = token_name |
|
|
|
|
|
|
|
def __call__(self, children): |
|
|
|
return self.node_builder( [Token(self.token_name, ''.join(children))] ) |
|
|
|
|
|
|
|
def identity(node_builder): |
|
|
|
return node_builder |
|
|
|
|
|
|
|
|
|
|
|
class ChildFilter: |
|
|
|
def __init__(self, node_builder, to_include): |
|
|
|
self.node_builder = node_builder |
|
|
|
self.to_include = to_include |
|
|
|
|
|
|
|
def __call__(self, children): |
|
|
|
filtered = [] |
|
|
|
for i, to_expand in self.to_include: |
|
|
|
if to_expand: |
|
|
|
filtered += children[i].children |
|
|
|
else: |
|
|
|
filtered.append(children[i]) |
|
|
|
|
|
|
|
return self.node_builder(filtered) |
|
|
|
|
|
|
|
def create_rule_handler(expansion, keep_all_tokens, filter_out): |
|
|
|
# if not keep_all_tokens: |
|
|
|
to_include = [(i, not is_terminal(sym) and sym.startswith('_')) |
|
|
|
for i, sym in enumerate(expansion) |
|
|
|
if keep_all_tokens |
|
|
|
or not ((is_terminal(sym) and sym.startswith('_')) or sym in filter_out) |
|
|
|
] |
|
|
|
|
|
|
|
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): |
|
|
|
return Factory(ChildFilter, to_include) |
|
|
|
|
|
|
|
# else, if no filtering required.. |
|
|
|
return identity |
|
|
|
|
|
|
|
class PropagatePositions: |
|
|
|
def __init__(self, node_builder): |
|
|
|
self.node_builder = node_builder |
|
|
|
|
|
|
|
def __call__(self, children): |
|
|
|
res = self.node_builder(children) |
|
|
|
|
|
|
|
if children: |
|
|
|
for a in children: |
|
|
|
with suppress(AttributeError): |
|
|
|
res.line = a.line |
|
|
|
res.column = a.column |
|
|
|
break |
|
|
|
|
|
|
|
for a in reversed(children): |
|
|
|
with suppress(AttributeError): |
|
|
|
res.end_line = a.end_line |
|
|
|
res.end_col = a.end_col |
|
|
|
break |
|
|
|
|
|
|
|
return res |
|
|
|
|
|
|
|
|
|
|
|
class Callback(object): |
|
|
|
pass |
|
|
|
|
|
|
|
class ParseTreeBuilder: |
|
|
|
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False): |
|
|
|
self.tree_class = tree_class |
|
|
|
self.propagate_positions = propagate_positions |
|
|
|
self.always_keep_all_tokens = keep_all_tokens |
|
|
|
|
|
|
|
self.rule_builders = list(self._init_builders(rules)) |
|
|
|
|
|
|
|
self.user_aliases = {} |
|
|
|
|
|
|
|
def _init_builders(self, rules): |
|
|
|
filter_out = set() |
|
|
|
for rule in rules: |
|
|
|
if rule.options and rule.options.filter_out: |
|
|
|
assert rule.origin.startswith('_') # Just to make sure |
|
|
|
filter_out.add(rule.origin) |
|
|
|
|
|
|
|
for rule in rules: |
|
|
|
options = rule.options |
|
|
|
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) |
|
|
|
expand1 = options.expand1 if options else False |
|
|
|
create_token = options.create_token if options else False |
|
|
|
|
|
|
|
wrapper_chain = filter(None, [ |
|
|
|
(expand1 and not rule.alias) and Expand1, |
|
|
|
create_token and Factory(TokenWrapper, create_token), |
|
|
|
create_rule_handler(rule.expansion, keep_all_tokens, filter_out), |
|
|
|
self.propagate_positions and PropagatePositions, |
|
|
|
]) |
|
|
|
|
|
|
|
yield rule, wrapper_chain |
|
|
|
|
|
|
|
|
|
|
|
def create_callback(self, transformer=None): |
|
|
|
callback = Callback() |
|
|
|
|
|
|
|
for rule, wrapper_chain in self.rule_builders: |
|
|
|
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) |
|
|
|
|
|
|
|
user_callback_name = rule.alias or rule.origin |
|
|
|
try: |
|
|
|
f = transformer._get_func(user_callback_name) |
|
|
|
except AttributeError: |
|
|
|
f = NodeBuilder(self.tree_class, user_callback_name) |
|
|
|
|
|
|
|
self.user_aliases[rule] = rule.alias |
|
|
|
rule.alias = internal_callback_name |
|
|
|
|
|
|
|
for w in wrapper_chain: |
|
|
|
f = w(f) |
|
|
|
|
|
|
|
if hasattr(callback, internal_callback_name): |
|
|
|
raise GrammarError("Rule '%s' already exists" % (rule,)) |
|
|
|
setattr(callback, internal_callback_name, f) |
|
|
|
|
|
|
|
return callback |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _Parser: |
|
|
|
def __init__(self, parse_table, callbacks): |
|
|
|
self.states = parse_table.states |
|
|
|
self.start_state = parse_table.start_state |
|
|
|
self.end_state = parse_table.end_state |
|
|
|
self.callbacks = callbacks |
|
|
|
|
|
|
|
def parse(self, seq, set_state=None): |
|
|
|
i = 0 |
|
|
|
token = None |
|
|
|
stream = iter(seq) |
|
|
|
states = self.states |
|
|
|
|
|
|
|
state_stack = [self.start_state] |
|
|
|
value_stack = [] |
|
|
|
|
|
|
|
if set_state: set_state(self.start_state) |
|
|
|
|
|
|
|
def get_action(key): |
|
|
|
state = state_stack[-1] |
|
|
|
try: |
|
|
|
return states[state][key] |
|
|
|
except KeyError: |
|
|
|
expected = states[state].keys() |
|
|
|
|
|
|
|
raise UnexpectedToken(token, expected, seq, i) |
|
|
|
|
|
|
|
def reduce(rule): |
|
|
|
size = len(rule.expansion) |
|
|
|
if size: |
|
|
|
s = value_stack[-size:] |
|
|
|
del state_stack[-size:] |
|
|
|
del value_stack[-size:] |
|
|
|
else: |
|
|
|
s = [] |
|
|
|
|
|
|
|
value = self.callbacks[rule](s) |
|
|
|
|
|
|
|
_action, new_state = get_action(rule.origin) |
|
|
|
assert _action is Shift |
|
|
|
state_stack.append(new_state) |
|
|
|
value_stack.append(value) |
|
|
|
|
|
|
|
# Main LALR-parser loop |
|
|
|
try: |
|
|
|
token = next(stream) |
|
|
|
i += 1 |
|
|
|
while True: |
|
|
|
action, arg = get_action(token.type) |
|
|
|
assert arg != self.end_state |
|
|
|
|
|
|
|
if action is Shift: |
|
|
|
state_stack.append(arg) |
|
|
|
value_stack.append(token) |
|
|
|
if set_state: set_state(arg) |
|
|
|
token = next(stream) |
|
|
|
i += 1 |
|
|
|
else: |
|
|
|
reduce(arg) |
|
|
|
except StopIteration: |
|
|
|
pass |
|
|
|
|
|
|
|
while True: |
|
|
|
_action, arg = get_action('$END') |
|
|
|
if _action is Shift: |
|
|
|
assert arg == self.end_state |
|
|
|
val ,= value_stack |
|
|
|
return val |
|
|
|
else: |
|
|
|
reduce(arg) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Rule(object): |
|
|
|
""" |
|
|
|
origin : a symbol |
|
|
|
expansion : a list of symbols |
|
|
|
""" |
|
|
|
def __init__(self, origin, expansion, alias=None, options=None): |
|
|
|
self.origin = origin |
|
|
|
self.expansion = expansion |
|
|
|
self.alias = alias |
|
|
|
self.options = options |
|
|
|
|
|
|
|
def __str__(self): |
|
|
|
return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) |
|
|
|
|
|
|
|
|
|
|
|
class RuleOptions: |
|
|
|
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): |
|
|
|
self.keep_all_tokens = keep_all_tokens |
|
|
|
self.expand1 = expand1 |
|
|
|
self.create_token = create_token # used for scanless postprocessing |
|
|
|
self.priority = priority |
|
|
|
|
|
|
|
self.filter_out = filter_out # remove this rule from the tree |
|
|
|
# used for "token"-rules in scanless |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return 'RuleOptions(%r, %r, %r, %r, %r)' % ( |
|
|
|
self.keep_all_tokens, |
|
|
|
self.expand1, |
|
|
|
self.create_token, |
|
|
|
self.priority, |
|
|
|
self.filter_out |
|
|
|
) |
|
|
|
|
|
|
|
Shift = 0 |
|
|
|
Reduce = 1 |
|
|
|
import re |
|
|
|
MRES = ( |
|
|
|
[('(?P<SIGNED_NUMBER>(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+))|(?P<ESCAPED_STRING>\\"(?:(?:\\\\\\"|[^"]))*\\")|(?P<WS>(?:[ \t\x0c' |
|
|
|
'\r\n' |
|
|
|
'])+)|(?P<__FALSE1>false)|(?P<__NULL2>null)|(?P<__TRUE0>true)|(?P<__COLON>\\:)|(?P<__COMMA>\\,)|(?P<__LBRACE>\\{)|(?P<__LSQB>\\[)|(?P<__RBRACE>\\})|(?P<__RSQB>\\])', |
|
|
|
{1: 'SIGNED_NUMBER', |
|
|
|
2: 'ESCAPED_STRING', |
|
|
|
3: 'WS', |
|
|
|
4: '__FALSE1', |
|
|
|
5: '__NULL2', |
|
|
|
6: '__TRUE0', |
|
|
|
7: '__COLON', |
|
|
|
8: '__COMMA', |
|
|
|
9: '__LBRACE', |
|
|
|
10: '__LSQB', |
|
|
|
11: '__RBRACE', |
|
|
|
12: '__RSQB'})] |
|
|
|
) |
|
|
|
LEXER_CALLBACK = ( |
|
|
|
{} |
|
|
|
) |
|
|
|
NEWLINE_TYPES = ['WS'] |
|
|
|
IGNORE_TYPES = ['WS'] |
|
|
|
class LexerRegexps: pass |
|
|
|
lexer_regexps = LexerRegexps() |
|
|
|
lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES] |
|
|
|
lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres]) |
|
|
|
for n, mres in LEXER_CALLBACK.items()} |
|
|
|
lexer = _Lex(lexer_regexps) |
|
|
|
def lex(stream): |
|
|
|
return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES) |
|
|
|
RULES = { |
|
|
|
0: Rule('start', ['value'], None, RuleOptions(False, True, None, None, False)), |
|
|
|
1: Rule('value', ['object'], None, RuleOptions(False, True, None, None, False)), |
|
|
|
2: Rule('value', ['array'], None, RuleOptions(False, True, None, None, False)), |
|
|
|
3: Rule('value', ['string'], None, RuleOptions(False, True, None, None, False)), |
|
|
|
4: Rule('value', ['SIGNED_NUMBER'], 'number', RuleOptions(False, True, None, None, False)), |
|
|
|
5: Rule('value', ['__TRUE0'], 'true', RuleOptions(False, True, None, None, False)), |
|
|
|
6: Rule('value', ['__FALSE1'], 'false', RuleOptions(False, True, None, None, False)), |
|
|
|
7: Rule('value', ['__NULL2'], 'null', RuleOptions(False, True, None, None, False)), |
|
|
|
8: Rule('array', ['__LSQB', 'value', '__anon_star_0', '__RSQB'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
9: Rule('array', ['__LSQB', 'value', '__RSQB'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
10: Rule('array', ['__LSQB', '__RSQB'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
11: Rule('object', ['__LBRACE', 'pair', '__anon_star_1', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
12: Rule('object', ['__LBRACE', 'pair', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
13: Rule('object', ['__LBRACE', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
14: Rule('pair', ['string', '__COLON', 'value'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
15: Rule('string', ['ESCAPED_STRING'], None, RuleOptions(False, False, None, None, False)), |
|
|
|
16: Rule('__anon_star_0', ['__COMMA', 'value'], None, None), |
|
|
|
17: Rule('__anon_star_0', ['__anon_star_0', '__COMMA', 'value'], None, None), |
|
|
|
18: Rule('__anon_star_1', ['__COMMA', 'pair'], None, None), |
|
|
|
19: Rule('__anon_star_1', ['__anon_star_1', '__COMMA', 'pair'], None, None), |
|
|
|
} |
|
|
|
parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree) |
|
|
|
class ParseTable: pass |
|
|
|
parse_table = ParseTable() |
|
|
|
STATES = { |
|
|
|
0: {0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 4: (0, 5), 5: (0, 6), 6: (0, 7), 7: (0, 8), 8: (0, 9), 9: (0, 10), 10: (0, 11), 11: (0, 12)}, |
|
|
|
1: {12: (1, 5), 13: (1, 5), 14: (1, 5), 15: (1, 5)}, |
|
|
|
2: {9: (0, 10), 14: (0, 13), 16: (0, 14), 11: (0, 15)}, |
|
|
|
3: {12: (1, 2), 13: (1, 2), 14: (1, 2), 15: (1, 2)}, |
|
|
|
4: {12: (1, 1), 13: (1, 1), 14: (1, 1), 15: (1, 1)}, |
|
|
|
5: {12: (0, 16)}, |
|
|
|
6: {7: (0, 17), 0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 5: (0, 6), 6: (0, 7), 8: (0, 9), 9: (0, 10), 15: (0, 18), 10: (0, 11), 11: (0, 12)}, |
|
|
|
7: {12: (1, 4), 13: (1, 4), 14: (1, 4), 15: (1, 4)}, |
|
|
|
8: {12: (1, 0)}, |
|
|
|
9: {12: (1, 7), 13: (1, 7), 14: (1, 7), 15: (1, 7)}, |
|
|
|
10: {12: (1, 15), 17: (1, 15), 13: (1, 15), 14: (1, 15), 15: (1, 15)}, |
|
|
|
11: {12: (1, 6), 13: (1, 6), 14: (1, 6), 15: (1, 6)}, |
|
|
|
12: {12: (1, 3), 13: (1, 3), 14: (1, 3), 15: (1, 3)}, |
|
|
|
13: {13: (1, 13), 12: (1, 13), 14: (1, 13), 15: (1, 13)}, |
|
|
|
14: {14: (0, 19), 13: (0, 20), 18: (0, 21)}, |
|
|
|
15: {17: (0, 22)}, |
|
|
|
16: {}, |
|
|
|
17: {19: (0, 23), 15: (0, 24), 13: (0, 25)}, |
|
|
|
18: {13: (1, 10), 12: (1, 10), 14: (1, 10), 15: (1, 10)}, |
|
|
|
19: {13: (1, 12), 12: (1, 12), 14: (1, 12), 15: (1, 12)}, |
|
|
|
20: {9: (0, 10), 11: (0, 15), 16: (0, 26)}, |
|
|
|
21: {14: (0, 27), 13: (0, 28)}, |
|
|
|
22: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12), 7: (0, 29)}, |
|
|
|
23: {15: (0, 30), 13: (0, 31)}, |
|
|
|
24: {13: (1, 9), 12: (1, 9), 14: (1, 9), 15: (1, 9)}, |
|
|
|
25: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 7: (0, 32), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)}, |
|
|
|
26: {13: (1, 18), 14: (1, 18)}, |
|
|
|
27: {13: (1, 11), 12: (1, 11), 14: (1, 11), 15: (1, 11)}, |
|
|
|
28: {16: (0, 33), 9: (0, 10), 11: (0, 15)}, |
|
|
|
29: {13: (1, 14), 14: (1, 14)}, |
|
|
|
30: {13: (1, 8), 12: (1, 8), 14: (1, 8), 15: (1, 8)}, |
|
|
|
31: {5: (0, 6), 1: (0, 2), 0: (0, 1), 7: (0, 34), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)}, |
|
|
|
32: {15: (1, 16), 13: (1, 16)}, |
|
|
|
33: {13: (1, 19), 14: (1, 19)}, |
|
|
|
34: {15: (1, 17), 13: (1, 17)}, |
|
|
|
} |
|
|
|
TOKEN_TYPES = ( |
|
|
|
{0: '__TRUE0', |
|
|
|
1: '__LBRACE', |
|
|
|
2: 'array', |
|
|
|
3: 'object', |
|
|
|
4: 'start', |
|
|
|
5: '__LSQB', |
|
|
|
6: 'SIGNED_NUMBER', |
|
|
|
7: 'value', |
|
|
|
8: '__NULL2', |
|
|
|
9: 'ESCAPED_STRING', |
|
|
|
10: '__FALSE1', |
|
|
|
11: 'string', |
|
|
|
12: '$END', |
|
|
|
13: '__COMMA', |
|
|
|
14: '__RBRACE', |
|
|
|
15: '__RSQB', |
|
|
|
16: 'pair', |
|
|
|
17: '__COLON', |
|
|
|
18: '__anon_star_1', |
|
|
|
19: '__anon_star_0'} |
|
|
|
) |
|
|
|
parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()} |
|
|
|
for s, acts in STATES.items()} |
|
|
|
parse_table.start_state = 0 |
|
|
|
parse_table.end_state = 16 |
|
|
|
class Lark_StandAlone: |
|
|
|
def __init__(self, transformer=None, postlex=None): |
|
|
|
callback = parse_tree_builder.create_callback(transformer=transformer) |
|
|
|
callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES.values()} |
|
|
|
self.parser = _Parser(parse_table, callbacks) |
|
|
|
self.postlex = postlex |
|
|
|
def parse(self, stream): |
|
|
|
tokens = lex(stream) |
|
|
|
if self.postlex: tokens = self.postlex.process(tokens) |
|
|
|
return self.parser.parse(tokens) |