# The file was automatically generated by Lark v0.5.5 # # # Lark Stand-alone Generator Tool # ---------------------------------- # Generates a stand-alone LALR(1) parser with a standard lexer # # Git: https://github.com/erezsh/lark # Author: Erez Shinan (erezshin@gmail.com) # # # >>> LICENSE # # This tool and its generated code use a separate license from Lark. # # It is licensed under GPLv2 or above. # # If you wish to purchase a commercial license for this tool and its # generated code, contact me via email. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # See . # # import types import functools from contextlib import contextmanager Str = type(u'') def inline_args(f): # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) if isinstance(f, types.FunctionType): @functools.wraps(f) def _f_func(self, args): return f(self, *args) return _f_func elif isinstance(f, (type, types.BuiltinFunctionType)): @functools.wraps(f) def _f_builtin(_self, args): return f(*args) return _f_builtin elif isinstance(f, types.MethodType): @functools.wraps(f.__func__) def _f(self, args): return f.__func__(self, *args) return _f else: @functools.wraps(f.__call__.__func__) def _f(self, args): return f.__call__.__func__(self, *args) return _f try: from contextlib import suppress # Python 3 except ImportError: @contextmanager def suppress(*excs): '''Catch and dismiss the provided exception >>> x = 'hello' >>> with suppress(IndexError): ... x = x[10] >>> x 'hello' ''' try: yield except excs: pass def is_terminal(sym): return sym.isupper() class GrammarError(Exception): pass class ParseError(Exception): pass class UnexpectedToken(ParseError): def __init__(self, token, expected, seq, index, considered_rules=None): self.token = token self.expected = expected self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') self.considered_rules = considered_rules try: context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) except AttributeError: context = seq[index:index+5] except TypeError: context = "" message = ("Unexpected token %r at line %s, column %s.\n" "Expected: %s\n" "Context: %s" % (token, self.line, self.column, expected, context)) super(UnexpectedToken, self).__init__(message) class Tree(object): def __init__(self, data, children): self.data = data self.children = children def __repr__(self): return 'Tree(%s, %s)' % (self.data, self.children) def _pretty_label(self): return self.data def _pretty(self, level, indent_str): if len(self.children) == 1 and not isinstance(self.children[0], Tree): return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] l = [ indent_str*level, self._pretty_label(), '\n' ] for n in self.children: if isinstance(n, Tree): l += n._pretty(level+1, indent_str) else: l += [ indent_str*(level+1), '%s' % (n,), '\n' ] return l def pretty(self, indent_str=' '): return ''.join(self._pretty(0, indent_str)) class Transformer(object): def _get_func(self, name): return getattr(self, name) def transform(self, tree): items = [] for c in tree.children: try: items.append(self.transform(c) if isinstance(c, Tree) else c) except Discard: pass try: f = self._get_func(tree.data) except AttributeError: return self.__default__(tree.data, items) else: return f(items) def __default__(self, data, children): return Tree(data, children) def __mul__(self, other): return TransformerChain(self, other) class Discard(Exception): pass class TransformerChain(object): def __init__(self, *transformers): self.transformers = transformers def transform(self, tree): for t in self.transformers: tree = t.transform(tree) return tree def __mul__(self, other): return TransformerChain(*self.transformers + (other,)) class InlineTransformer(Transformer): def _get_func(self, name): # use super()._get_func return inline_args(getattr(self, name)).__get__(self) class Visitor(object): def visit(self, tree): for child in tree.children: if isinstance(child, Tree): self.visit(child) f = getattr(self, tree.data, self.__default__) f(tree) return tree def __default__(self, tree): pass class Visitor_NoRecurse(Visitor): def visit(self, tree): subtrees = list(tree.iter_subtrees()) for subtree in (subtrees): getattr(self, subtree.data, self.__default__)(subtree) return tree class Transformer_NoRecurse(Transformer): def transform(self, tree): subtrees = list(tree.iter_subtrees()) def _t(t): # Assumes t is already transformed try: f = self._get_func(t.data) except AttributeError: return self.__default__(t) else: return f(t) for subtree in subtrees: children = [] for c in subtree.children: try: children.append(_t(c) if isinstance(c, Tree) else c) except Discard: pass subtree.children = children return _t(tree) def __default__(self, t): return t class Indenter: def __init__(self): self.paren_level = 0 self.indent_level = [0] def handle_NL(self, token): if self.paren_level > 0: return yield token indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len if indent > self.indent_level[-1]: self.indent_level.append(indent) yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) else: while indent < self.indent_level[-1]: self.indent_level.pop() yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1]) def process(self, stream): for token in stream: if token.type == self.NL_type: for t in self.handle_NL(token): yield t else: yield token if token.type in self.OPEN_PAREN_types: self.paren_level += 1 elif token.type in self.CLOSE_PAREN_types: self.paren_level -= 1 assert self.paren_level >= 0 while len(self.indent_level) > 1: self.indent_level.pop() yield Token(self.DEDENT_type, '') assert self.indent_level == [0], self.indent_level # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? @property def always_accept(self): return (self.NL_type,) class LexError(Exception): pass class UnexpectedInput(LexError): def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None): context = seq[lex_pos:lex_pos+5] message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) if allowed: message += '\n\nExpecting: %s\n' % allowed super(UnexpectedInput, self).__init__(message) self.line = line self.column = column self.context = context self.allowed = allowed self.considered_rules = considered_rules class Token(Str): def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): inst = Str.__new__(cls, value) inst.type = type_ inst.pos_in_stream = pos_in_stream inst.value = value inst.line = line inst.column = column return inst @classmethod def new_borrow_pos(cls, type_, value, borrow_t): return cls(type_, value, borrow_t.pos_in_stream, line=borrow_t.line, column=borrow_t.column) def __repr__(self): return 'Token(%s, %r)' % (self.type, self.value) def __deepcopy__(self, memo): return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) def __eq__(self, other): if isinstance(other, Token) and self.type != other.type: return False return Str.__eq__(self, other) __hash__ = Str.__hash__ class LineCounter: def __init__(self): self.newline_char = '\n' self.char_pos = 0 self.line = 1 self.column = 0 self.line_start_pos = 0 def feed(self, token, test_newline=True): """Consume a token and calculate the new line & column. As an optional optimization, set test_newline=False is token doesn't contain a newline. """ if test_newline: newlines = token.count(self.newline_char) if newlines: self.line += newlines self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 self.char_pos += len(token) self.column = self.char_pos - self.line_start_pos class _Lex: "Built to serve both Lexer and ContextualLexer" def __init__(self, lexer): self.lexer = lexer def lex(self, stream, newline_types, ignore_types): newline_types = list(newline_types) ignore_types = list(ignore_types) line_ctr = LineCounter() t = None while True: lexer = self.lexer for mre, type_from_index in lexer.mres: m = mre.match(stream, line_ctr.char_pos) if m: value = m.group(0) type_ = type_from_index[m.lastindex] if type_ not in ignore_types: t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) if t.type in lexer.callback: t = lexer.callback[t.type](t) yield t else: if type_ in lexer.callback: t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) lexer.callback[type_](t) line_ctr.feed(value, type_ in newline_types) if t: t.end_line = line_ctr.line t.end_column = line_ctr.column break else: if line_ctr.char_pos < len(stream): raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) break class UnlessCallback: def __init__(self, mres): self.mres = mres def __call__(self, t): for mre, type_from_index in self.mres: m = mre.match(t.value) if m: value = m.group(0) t.type = type_from_index[m.lastindex] break return t from functools import partial class ExpandSingleChild: def __init__(self, node_builder): self.node_builder = node_builder def __call__(self, children): if len(children) == 1: return children[0] else: return self.node_builder(children) class CreateToken: "Used for fixing the results of scanless parsing" def __init__(self, token_name, node_builder): self.node_builder = node_builder self.token_name = token_name def __call__(self, children): return self.node_builder( [Token(self.token_name, ''.join(children))] ) class PropagatePositions: def __init__(self, node_builder): self.node_builder = node_builder def __call__(self, children): res = self.node_builder(children) if children: for a in children: with suppress(AttributeError): res.line = a.line res.column = a.column break for a in reversed(children): with suppress(AttributeError): res.end_line = a.end_line res.end_column = a.end_column break return res class ChildFilter: def __init__(self, to_include, node_builder): self.node_builder = node_builder self.to_include = to_include def __call__(self, children): filtered = [] for i, to_expand in self.to_include: if to_expand: if filtered: filtered += children[i].children else: # Optimize for left-recursion filtered = children[i].children else: filtered.append(children[i]) return self.node_builder(filtered) def _should_expand(sym): return not is_terminal(sym) and sym.startswith('_') def maybe_create_child_filter(expansion, filter_out): to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): return partial(ChildFilter, to_include) class Callback(object): pass class ParseTreeBuilder: def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False): self.tree_class = tree_class self.propagate_positions = propagate_positions self.always_keep_all_tokens = keep_all_tokens self.rule_builders = list(self._init_builders(rules)) self.user_aliases = {} def _init_builders(self, rules): filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} filter_out |= {sym for rule in rules for sym in rule.expansion if is_terminal(sym) and sym.startswith('_')} assert all(x.startswith('_') for x in filter_out) for rule in rules: options = rule.options keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) expand_single_child = options.expand1 if options else False create_token = options.create_token if options else False wrapper_chain = filter(None, [ create_token and partial(CreateToken, create_token), (expand_single_child and not rule.alias) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, () if keep_all_tokens else filter_out), self.propagate_positions and PropagatePositions, ]) yield rule, wrapper_chain def create_callback(self, transformer=None): callback = Callback() for rule, wrapper_chain in self.rule_builders: internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) user_callback_name = rule.alias or rule.origin try: f = transformer._get_func(user_callback_name) except AttributeError: f = partial(self.tree_class, user_callback_name) self.user_aliases[rule] = rule.alias rule.alias = internal_callback_name for w in wrapper_chain: f = w(f) if hasattr(callback, internal_callback_name): raise GrammarError("Rule '%s' already exists" % (rule,)) setattr(callback, internal_callback_name, f) return callback class _Parser: def __init__(self, parse_table, callbacks): self.states = parse_table.states self.start_state = parse_table.start_state self.end_state = parse_table.end_state self.callbacks = callbacks def parse(self, seq, set_state=None): i = 0 token = None stream = iter(seq) states = self.states state_stack = [self.start_state] value_stack = [] if set_state: set_state(self.start_state) def get_action(key): state = state_stack[-1] try: return states[state][key] except KeyError: expected = states[state].keys() raise UnexpectedToken(token, expected, seq, i) def reduce(rule): size = len(rule.expansion) if size: s = value_stack[-size:] del state_stack[-size:] del value_stack[-size:] else: s = [] value = self.callbacks[rule](s) _action, new_state = get_action(rule.origin) assert _action is Shift state_stack.append(new_state) value_stack.append(value) # Main LALR-parser loop for i, token in enumerate(stream): while True: action, arg = get_action(token.type) assert arg != self.end_state if action is Shift: state_stack.append(arg) value_stack.append(token) if set_state: set_state(arg) break # next token else: reduce(arg) while True: _action, arg = get_action('$END') if _action is Shift: assert arg == self.end_state val ,= value_stack return val else: reduce(arg) class Rule(object): """ origin : a symbol expansion : a list of symbols """ def __init__(self, origin, expansion, alias=None, options=None): self.origin = origin self.expansion = expansion self.alias = alias self.options = options def __str__(self): return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) def __repr__(self): return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) class RuleOptions: def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 self.create_token = create_token # used for scanless postprocessing self.priority = priority self.filter_out = filter_out # remove this rule from the tree # used for "token"-rules in scanless def __repr__(self): return 'RuleOptions(%r, %r, %r, %r, %r)' % ( self.keep_all_tokens, self.expand1, self.create_token, self.priority, self.filter_out ) Shift = 0 Reduce = 1 import re MRES = ( [(u'(?P(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+))|(?P\\"(?:(?:\\\\\\"|[^"]))*\\")|(?P(?:[ \t\x0c\r\n])+)|(?P<__FALSE1>false)|(?P<__NULL2>null)|(?P<__TRUE0>true)|(?P<__COLON>\\:)|(?P<__COMMA>\\,)|(?P<__LBRACE>\\{)|(?P<__LSQB>\\[)|(?P<__RBRACE>\\})|(?P<__RSQB>\\])', {1: u'SIGNED_NUMBER', 2: u'ESCAPED_STRING', 3: u'WS', 4: u'__FALSE1', 5: u'__NULL2', 6: u'__TRUE0', 7: u'__COLON', 8: u'__COMMA', 9: u'__LBRACE', 10: u'__LSQB', 11: u'__RBRACE', 12: u'__RSQB'})] ) LEXER_CALLBACK = ( {} ) NEWLINE_TYPES = [u'WS'] IGNORE_TYPES = [u'WS'] class LexerRegexps: pass lexer_regexps = LexerRegexps() lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES] lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres]) for n, mres in LEXER_CALLBACK.items()} lexer = _Lex(lexer_regexps) def lex(stream): return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES) RULES = { 0: Rule(u'start', [u'value'], None, RuleOptions(False, True, None, None, False)), 1: Rule(u'value', [u'string'], None, RuleOptions(False, True, None, None, False)), 2: Rule(u'value', [u'__TRUE0'], u'true', RuleOptions(False, True, None, None, False)), 3: Rule(u'value', [u'array'], None, RuleOptions(False, True, None, None, False)), 4: Rule(u'value', [u'__NULL2'], u'null', RuleOptions(False, True, None, None, False)), 5: Rule(u'value', [u'SIGNED_NUMBER'], u'number', RuleOptions(False, True, None, None, False)), 6: Rule(u'value', [u'object'], None, RuleOptions(False, True, None, None, False)), 7: Rule(u'value', [u'__FALSE1'], u'false', RuleOptions(False, True, None, None, False)), 8: Rule(u'array', ['__LSQB', u'value', '__RSQB'], None, RuleOptions(False, False, None, None, False)), 9: Rule(u'array', ['__LSQB', u'value', '__anon_star_0', '__RSQB'], None, RuleOptions(False, False, None, None, False)), 10: Rule(u'array', ['__LSQB', '__RSQB'], None, RuleOptions(False, False, None, None, False)), 11: Rule(u'object', ['__LBRACE', u'pair', '__anon_star_1', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), 12: Rule(u'object', ['__LBRACE', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), 13: Rule(u'object', ['__LBRACE', u'pair', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), 14: Rule(u'pair', [u'string', '__COLON', u'value'], None, RuleOptions(False, False, None, None, False)), 15: Rule(u'string', [u'ESCAPED_STRING'], None, RuleOptions(False, False, None, None, False)), 16: Rule('__anon_star_0', ['__anon_star_0', '__COMMA', u'value'], None, None), 17: Rule('__anon_star_0', ['__COMMA', u'value'], None, None), 18: Rule('__anon_star_1', ['__COMMA', u'pair'], None, None), 19: Rule('__anon_star_1', ['__anon_star_1', '__COMMA', u'pair'], None, None), } parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree) class ParseTable: pass parse_table = ParseTable() STATES = { 0: {0: (1, 4), 1: (1, 4), 2: (1, 4), 3: (1, 4)}, 1: {1: (1, 14), 2: (1, 14)}, 2: {0: (0, 29), 1: (0, 32), 4: (0, 9)}, 3: {1: (0, 13), 2: (0, 12)}, 4: {0: (1, 1), 1: (1, 1), 2: (1, 1), 3: (1, 1)}, 5: {0: (1, 10), 1: (1, 10), 2: (1, 10), 3: (1, 10)}, 6: {2: (0, 15), 5: (0, 27), 6: (0, 16), 7: (0, 26)}, 7: {5: (0, 34), 6: (0, 16), 7: (0, 26)}, 8: {0: (1, 2), 1: (1, 2), 2: (1, 2), 3: (1, 2)}, 9: {0: (0, 11), 1: (0, 22)}, 10: {0: (1, 6), 1: (1, 6), 2: (1, 6), 3: (1, 6)}, 11: {0: (1, 9), 1: (1, 9), 2: (1, 9), 3: (1, 9)}, 12: {0: (1, 11), 1: (1, 11), 2: (1, 11), 3: (1, 11)}, 13: {5: (0, 20), 6: (0, 16), 7: (0, 26)}, 14: {6: (0, 16), 7: (0, 4), 8: (0, 6), 9: (0, 31), 10: (0, 24), 11: (0, 10), 12: (0, 21), 13: (0, 17), 14: (0, 33), 15: (0, 0), 16: (0, 19), 17: (0, 8)}, 15: {0: (1, 12), 1: (1, 12), 2: (1, 12), 3: (1, 12)}, 16: {0: (1, 15), 1: (1, 15), 2: (1, 15), 3: (1, 15), 18: (1, 15)}, 17: {3: (1, 0)}, 18: {}, 19: {0: (1, 3), 1: (1, 3), 2: (1, 3), 3: (1, 3)}, 20: {1: (1, 19), 2: (1, 19)}, 21: {0: (1, 5), 1: (1, 5), 2: (1, 5), 3: (1, 5)}, 22: {6: (0, 16), 7: (0, 4), 8: (0, 6), 9: (0, 31), 10: (0, 24), 11: (0, 10), 12: (0, 21), 13: (0, 30), 15: (0, 0), 16: (0, 19), 17: (0, 8)}, 23: {6: (0, 16), 7: (0, 4), 8: (0, 6), 9: (0, 31), 10: (0, 24), 11: (0, 10), 12: (0, 21), 13: (0, 1), 15: (0, 0), 16: (0, 19), 17: (0, 8)}, 24: {0: (0, 5), 6: (0, 16), 7: (0, 4), 8: (0, 6), 9: (0, 31), 10: (0, 24), 11: (0, 10), 12: (0, 21), 13: (0, 2), 15: (0, 0), 16: (0, 19), 17: (0, 8)}, 25: {0: (1, 13), 1: (1, 13), 2: (1, 13), 3: (1, 13)}, 26: {18: (0, 23)}, 27: {1: (0, 7), 2: (0, 25), 19: (0, 3)}, 28: {0: (1, 17), 1: (1, 17)}, 29: {0: (1, 8), 1: (1, 8), 2: (1, 8), 3: (1, 8)}, 30: {0: (1, 16), 1: (1, 16)}, 31: {0: (1, 7), 1: (1, 7), 2: (1, 7), 3: (1, 7)}, 32: {6: (0, 16), 7: (0, 4), 8: (0, 6), 9: (0, 31), 10: (0, 24), 11: (0, 10), 12: (0, 21), 13: (0, 28), 15: (0, 0), 16: (0, 19), 17: (0, 8)}, 33: {3: (0, 18)}, 34: {1: (1, 18), 2: (1, 18)}, } TOKEN_TYPES = ( {0: '__RSQB', 1: '__COMMA', 2: '__RBRACE', 3: '$END', 4: '__anon_star_0', 5: u'pair', 6: u'ESCAPED_STRING', 7: u'string', 8: '__LBRACE', 9: u'__FALSE1', 10: '__LSQB', 11: u'object', 12: u'SIGNED_NUMBER', 13: u'value', 14: 'start', 15: u'__NULL2', 16: u'array', 17: u'__TRUE0', 18: '__COLON', 19: '__anon_star_1'} ) parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()} for s, acts in STATES.items()} parse_table.start_state = 14 parse_table.end_state = 18 class Lark_StandAlone: def __init__(self, transformer=None, postlex=None): callback = parse_tree_builder.create_callback(transformer=transformer) callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES.values()} self.parser = _Parser(parse_table, callbacks) self.postlex = postlex def parse(self, stream): tokens = lex(stream) if self.postlex: tokens = self.postlex.process(tokens) return self.parser.parse(tokens)