From 4d219ae837aaf15c6d1c533358683e30abf837c1 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 11 Jan 2018 16:02:02 +0200 Subject: [PATCH] Added standalone example --- examples/standalone/create_standalone.sh | 1 + examples/standalone/json.g | 21 + examples/standalone/json_parser.py | 794 +++++++++++++++++++++++ examples/standalone/json_parser_main.py | 25 + 4 files changed, 841 insertions(+) create mode 100755 examples/standalone/create_standalone.sh create mode 100644 examples/standalone/json.g create mode 100644 examples/standalone/json_parser.py create mode 100644 examples/standalone/json_parser_main.py diff --git a/examples/standalone/create_standalone.sh b/examples/standalone/create_standalone.sh new file mode 100755 index 0000000..1eba3a4 --- /dev/null +++ b/examples/standalone/create_standalone.sh @@ -0,0 +1 @@ +python -m lark.tools.standalone json.g > json_parser.py diff --git a/examples/standalone/json.g b/examples/standalone/json.g new file mode 100644 index 0000000..243a230 --- /dev/null +++ b/examples/standalone/json.g @@ -0,0 +1,21 @@ +?start: value + +?value: object + | array + | string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + +array : "[" [value ("," value)*] "]" +object : "{" [pair ("," pair)*] "}" +pair : string ":" value + +string : ESCAPED_STRING + +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.WS + +%ignore WS diff --git a/examples/standalone/json_parser.py b/examples/standalone/json_parser.py new file mode 100644 index 0000000..f249f61 --- /dev/null +++ b/examples/standalone/json_parser.py @@ -0,0 +1,794 @@ +# The file was automatically generated by Lark v0.5.2 +# +# +# Lark Stand-alone Generator Tool +# ---------------------------------- +# Generates a stand-alone LALR(1) parser with a standard lexer +# +# Git: https://github.com/erezsh/lark +# Author: Erez Shinan (erezshin@gmail.com) +# +# +# >>> LICENSE +# +# This tool and its generated code use a separate license from Lark. +# +# It is licensed under GPLv2 or above. +# +# If you wish to purchase a commercial license for this tool and its +# generated code, contact me via email. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# See . +# +# + + +import types +import functools +from contextlib import contextmanager + +Str = type(u'') + +def inline_args(f): + # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) + if isinstance(f, types.FunctionType): + @functools.wraps(f) + def _f_func(self, args): + return f(self, *args) + return _f_func + elif isinstance(f, (type, types.BuiltinFunctionType)): + @functools.wraps(f) + def _f_builtin(_self, args): + return f(*args) + return _f_builtin + elif isinstance(f, types.MethodType): + @functools.wraps(f.__func__) + def _f(self, args): + return f.__func__(self, *args) + return _f + else: + @functools.wraps(f.__call__.__func__) + def _f(self, args): + return f.__call__.__func__(self, *args) + return _f + + +try: + from contextlib import suppress # Python 3 +except ImportError: + @contextmanager + def suppress(*excs): + '''Catch and dismiss the provided exception + + >>> x = 'hello' + >>> with suppress(IndexError): + ... x = x[10] + >>> x + 'hello' + ''' + try: + yield + except excs: + pass + + +def is_terminal(sym): + return sym.isupper() + +class GrammarError(Exception): + pass + +class ParseError(Exception): + pass + +class UnexpectedToken(ParseError): + def __init__(self, token, expected, seq, index): + self.token = token + self.expected = expected + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + + try: + context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) + except AttributeError: + context = seq[index:index+5] + except TypeError: + context = "" + message = ("Unexpected token %r at line %s, column %s.\n" + "Expected: %s\n" + "Context: %s" % (token, self.line, self.column, expected, context)) + + super(UnexpectedToken, self).__init__(message) + + + +class Tree(object): + def __init__(self, data, children): + self.data = data + self.children = list(children) + + def __repr__(self): + return 'Tree(%s, %s)' % (self.data, self.children) + + def _pretty_label(self): + return self.data + + def _pretty(self, level, indent_str): + if len(self.children) == 1 and not isinstance(self.children[0], Tree): + return [ indent_str*level, self._pretty_label(), '\t', '%s' % self.children[0], '\n'] + + l = [ indent_str*level, self._pretty_label(), '\n' ] + for n in self.children: + if isinstance(n, Tree): + l += n._pretty(level+1, indent_str) + else: + l += [ indent_str*(level+1), '%s' % n, '\n' ] + + return l + + def pretty(self, indent_str=' '): + return ''.join(self._pretty(0, indent_str)) +class Transformer(object): + def _get_func(self, name): + return getattr(self, name) + + def transform(self, tree): + items = [] + for c in tree.children: + try: + items.append(self.transform(c) if isinstance(c, Tree) else c) + except Discard: + pass + try: + f = self._get_func(tree.data) + except AttributeError: + return self.__default__(tree.data, items) + else: + return f(items) + + def __default__(self, data, children): + return Tree(data, children) + + def __mul__(self, other): + return TransformerChain(self, other) + + +class Discard(Exception): + pass + +class TransformerChain(object): + def __init__(self, *transformers): + self.transformers = transformers + + def transform(self, tree): + for t in self.transformers: + tree = t.transform(tree) + return tree + + def __mul__(self, other): + return TransformerChain(*self.transformers + (other,)) + + + +class InlineTransformer(Transformer): + def _get_func(self, name): # use super()._get_func + return inline_args(getattr(self, name)).__get__(self) + + +class Visitor(object): + def visit(self, tree): + for child in tree.children: + if isinstance(child, Tree): + self.visit(child) + + f = getattr(self, tree.data, self.__default__) + f(tree) + return tree + + def __default__(self, tree): + pass + + +class Visitor_NoRecurse(Visitor): + def visit(self, tree): + subtrees = list(tree.iter_subtrees()) + + for subtree in (subtrees): + getattr(self, subtree.data, self.__default__)(subtree) + return tree + + +class Transformer_NoRecurse(Transformer): + def transform(self, tree): + subtrees = list(tree.iter_subtrees()) + + def _t(t): + # Assumes t is already transformed + try: + f = self._get_func(t.data) + except AttributeError: + return self.__default__(t) + else: + return f(t) + + for subtree in subtrees: + children = [] + for c in subtree.children: + try: + children.append(_t(c) if isinstance(c, Tree) else c) + except Discard: + pass + subtree.children = children + + return _t(tree) + + def __default__(self, t): + return t + +class Indenter: + def __init__(self): + self.paren_level = 0 + self.indent_level = [0] + + def handle_NL(self, token): + if self.paren_level > 0: + return + + yield token + + indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces + indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len + + if indent > self.indent_level[-1]: + self.indent_level.append(indent) + yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) + else: + while indent < self.indent_level[-1]: + self.indent_level.pop() + yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) + + assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1]) + + def process(self, stream): + for token in stream: + if token.type == self.NL_type: + for t in self.handle_NL(token): + yield t + else: + yield token + + if token.type in self.OPEN_PAREN_types: + self.paren_level += 1 + elif token.type in self.CLOSE_PAREN_types: + self.paren_level -= 1 + assert self.paren_level >= 0 + + while len(self.indent_level) > 1: + self.indent_level.pop() + yield Token(self.DEDENT_type, '') + + assert self.indent_level == [0], self.indent_level + + # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? + @property + def always_accept(self): + return (self.NL_type,) + + +class LexError(Exception): + pass + +class UnexpectedInput(LexError): + def __init__(self, seq, lex_pos, line, column, allowed=None): + context = seq[lex_pos:lex_pos+5] + message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) + + super(UnexpectedInput, self).__init__(message) + + self.line = line + self.column = column + self.context = context + self.allowed = allowed + +class Token(Str): + def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): + inst = Str.__new__(cls, value) + inst.type = type_ + inst.pos_in_stream = pos_in_stream + inst.value = value + inst.line = line + inst.column = column + return inst + + @classmethod + def new_borrow_pos(cls, type_, value, borrow_t): + return cls(type_, value, borrow_t.pos_in_stream, line=borrow_t.line, column=borrow_t.column) + + def __repr__(self): + return 'Token(%s, %r)' % (self.type, self.value) + + def __deepcopy__(self, memo): + return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) + + def __eq__(self, other): + if isinstance(other, Token) and self.type != other.type: + return False + + return Str.__eq__(self, other) + + __hash__ = Str.__hash__ + + +class LineCounter: + def __init__(self): + self.newline_char = '\n' + self.char_pos = 0 + self.line = 1 + self.column = 0 + self.line_start_pos = 0 + + def feed(self, token, test_newline=True): + """Consume a token and calculate the new line & column. + + As an optional optimization, set test_newline=False is token doesn't contain a newline. + """ + if test_newline: + newlines = token.count(self.newline_char) + if newlines: + self.line += newlines + self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 + + self.char_pos += len(token) + self.column = self.char_pos - self.line_start_pos + +class _Lex: + "Built to serve both Lexer and ContextualLexer" + def __init__(self, lexer): + self.lexer = lexer + + def lex(self, stream, newline_types, ignore_types): + newline_types = list(newline_types) + newline_types = list(newline_types) + line_ctr = LineCounter() + + while True: + lexer = self.lexer + for mre, type_from_index in lexer.mres: + m = mre.match(stream, line_ctr.char_pos) + if m: + value = m.group(0) + type_ = type_from_index[m.lastindex] + if type_ not in ignore_types: + t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + if t.type in lexer.callback: + t = lexer.callback[t.type](t) + lexer = yield t + + line_ctr.feed(value, type_ in newline_types) + break + else: + if line_ctr.char_pos < len(stream): + raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) + break + +class UnlessCallback: + def __init__(self, mres): + self.mres = mres + + def __call__(self, t): + for mre, type_from_index in self.mres: + m = mre.match(t.value) + if m: + value = m.group(0) + t.type = type_from_index[m.lastindex] + break + return t + + + +class NodeBuilder: + def __init__(self, tree_class, name): + self.tree_class = tree_class + self.name = name + + def __call__(self, children): + return self.tree_class(self.name, children) + +class Expand1: + def __init__(self, node_builder): + self.node_builder = node_builder + + def __call__(self, children): + if len(children) == 1: + return children[0] + else: + return self.node_builder(children) + +class Factory: + def __init__(self, cls, *args): + self.cls = cls + self.args = args + + def __call__(self, node_builder): + return self.cls(node_builder, *self.args) + + +class TokenWrapper: + "Used for fixing the results of scanless parsing" + + def __init__(self, node_builder, token_name): + self.node_builder = node_builder + self.token_name = token_name + + def __call__(self, children): + return self.node_builder( [Token(self.token_name, ''.join(children))] ) + +def identity(node_builder): + return node_builder + + +class ChildFilter: + def __init__(self, node_builder, to_include): + self.node_builder = node_builder + self.to_include = to_include + + def __call__(self, children): + filtered = [] + for i, to_expand in self.to_include: + if to_expand: + filtered += children[i].children + else: + filtered.append(children[i]) + + return self.node_builder(filtered) + +def create_rule_handler(expansion, keep_all_tokens, filter_out): + # if not keep_all_tokens: + to_include = [(i, not is_terminal(sym) and sym.startswith('_')) + for i, sym in enumerate(expansion) + if keep_all_tokens + or not ((is_terminal(sym) and sym.startswith('_')) or sym in filter_out) + ] + + if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): + return Factory(ChildFilter, to_include) + + # else, if no filtering required.. + return identity + +class PropagatePositions: + def __init__(self, node_builder): + self.node_builder = node_builder + + def __call__(self, children): + res = self.node_builder(children) + + if children: + for a in children: + with suppress(AttributeError): + res.line = a.line + res.column = a.column + break + + for a in reversed(children): + with suppress(AttributeError): + res.end_line = a.end_line + res.end_col = a.end_col + break + + return res + + +class Callback(object): + pass + +class ParseTreeBuilder: + def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False): + self.tree_class = tree_class + self.propagate_positions = propagate_positions + self.always_keep_all_tokens = keep_all_tokens + + self.rule_builders = list(self._init_builders(rules)) + + self.user_aliases = {} + + def _init_builders(self, rules): + filter_out = set() + for rule in rules: + if rule.options and rule.options.filter_out: + assert rule.origin.startswith('_') # Just to make sure + filter_out.add(rule.origin) + + for rule in rules: + options = rule.options + keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) + expand1 = options.expand1 if options else False + create_token = options.create_token if options else False + + wrapper_chain = filter(None, [ + (expand1 and not rule.alias) and Expand1, + create_token and Factory(TokenWrapper, create_token), + create_rule_handler(rule.expansion, keep_all_tokens, filter_out), + self.propagate_positions and PropagatePositions, + ]) + + yield rule, wrapper_chain + + + def create_callback(self, transformer=None): + callback = Callback() + + for rule, wrapper_chain in self.rule_builders: + internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) + + user_callback_name = rule.alias or rule.origin + try: + f = transformer._get_func(user_callback_name) + except AttributeError: + f = NodeBuilder(self.tree_class, user_callback_name) + + self.user_aliases[rule] = rule.alias + rule.alias = internal_callback_name + + for w in wrapper_chain: + f = w(f) + + if hasattr(callback, internal_callback_name): + raise GrammarError("Rule '%s' already exists" % (rule,)) + setattr(callback, internal_callback_name, f) + + return callback + + + +class _Parser: + def __init__(self, parse_table, callbacks): + self.states = parse_table.states + self.start_state = parse_table.start_state + self.end_state = parse_table.end_state + self.callbacks = callbacks + + def parse(self, seq, set_state=None): + i = 0 + token = None + stream = iter(seq) + states = self.states + + state_stack = [self.start_state] + value_stack = [] + + if set_state: set_state(self.start_state) + + def get_action(key): + state = state_stack[-1] + try: + return states[state][key] + except KeyError: + expected = states[state].keys() + + raise UnexpectedToken(token, expected, seq, i) + + def reduce(rule): + size = len(rule.expansion) + if size: + s = value_stack[-size:] + del state_stack[-size:] + del value_stack[-size:] + else: + s = [] + + value = self.callbacks[rule](s) + + _action, new_state = get_action(rule.origin) + assert _action is Shift + state_stack.append(new_state) + value_stack.append(value) + + # Main LALR-parser loop + try: + token = next(stream) + i += 1 + while True: + action, arg = get_action(token.type) + assert arg != self.end_state + + if action is Shift: + state_stack.append(arg) + value_stack.append(token) + if set_state: set_state(arg) + token = next(stream) + i += 1 + else: + reduce(arg) + except StopIteration: + pass + + while True: + _action, arg = get_action('$END') + if _action is Shift: + assert arg == self.end_state + val ,= value_stack + return val + else: + reduce(arg) + + + +class Rule(object): + """ + origin : a symbol + expansion : a list of symbols + """ + def __init__(self, origin, expansion, alias=None, options=None): + self.origin = origin + self.expansion = expansion + self.alias = alias + self.options = options + + def __str__(self): + return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) + + def __repr__(self): + return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) + + +class RuleOptions: + def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): + self.keep_all_tokens = keep_all_tokens + self.expand1 = expand1 + self.create_token = create_token # used for scanless postprocessing + self.priority = priority + + self.filter_out = filter_out # remove this rule from the tree + # used for "token"-rules in scanless + + def __repr__(self): + return 'RuleOptions(%r, %r, %r, %r, %r)' % ( + self.keep_all_tokens, + self.expand1, + self.create_token, + self.priority, + self.filter_out + ) + +Shift = 0 +Reduce = 1 +import re +MRES = ( +[('(?P(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+))|(?P\\"(?:(?:\\\\\\"|[^"]))*\\")|(?P(?:[ \t\x0c' + '\r\n' + '])+)|(?P<__FALSE1>false)|(?P<__NULL2>null)|(?P<__TRUE0>true)|(?P<__COLON>\\:)|(?P<__COMMA>\\,)|(?P<__LBRACE>\\{)|(?P<__LSQB>\\[)|(?P<__RBRACE>\\})|(?P<__RSQB>\\])', + {1: 'SIGNED_NUMBER', + 2: 'ESCAPED_STRING', + 3: 'WS', + 4: '__FALSE1', + 5: '__NULL2', + 6: '__TRUE0', + 7: '__COLON', + 8: '__COMMA', + 9: '__LBRACE', + 10: '__LSQB', + 11: '__RBRACE', + 12: '__RSQB'})] +) +LEXER_CALLBACK = ( +{} +) +NEWLINE_TYPES = ['WS'] +IGNORE_TYPES = ['WS'] +class LexerRegexps: pass +lexer_regexps = LexerRegexps() +lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES] +lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres]) + for n, mres in LEXER_CALLBACK.items()} +lexer = _Lex(lexer_regexps) +def lex(stream): + return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES) +RULES = { + 0: Rule('start', ['value'], None, RuleOptions(False, True, None, None, False)), + 1: Rule('value', ['object'], None, RuleOptions(False, True, None, None, False)), + 2: Rule('value', ['array'], None, RuleOptions(False, True, None, None, False)), + 3: Rule('value', ['string'], None, RuleOptions(False, True, None, None, False)), + 4: Rule('value', ['SIGNED_NUMBER'], 'number', RuleOptions(False, True, None, None, False)), + 5: Rule('value', ['__TRUE0'], 'true', RuleOptions(False, True, None, None, False)), + 6: Rule('value', ['__FALSE1'], 'false', RuleOptions(False, True, None, None, False)), + 7: Rule('value', ['__NULL2'], 'null', RuleOptions(False, True, None, None, False)), + 8: Rule('array', ['__LSQB', 'value', '__anon_star_0', '__RSQB'], None, RuleOptions(False, False, None, None, False)), + 9: Rule('array', ['__LSQB', 'value', '__RSQB'], None, RuleOptions(False, False, None, None, False)), + 10: Rule('array', ['__LSQB', '__RSQB'], None, RuleOptions(False, False, None, None, False)), + 11: Rule('object', ['__LBRACE', 'pair', '__anon_star_1', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), + 12: Rule('object', ['__LBRACE', 'pair', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), + 13: Rule('object', ['__LBRACE', '__RBRACE'], None, RuleOptions(False, False, None, None, False)), + 14: Rule('pair', ['string', '__COLON', 'value'], None, RuleOptions(False, False, None, None, False)), + 15: Rule('string', ['ESCAPED_STRING'], None, RuleOptions(False, False, None, None, False)), + 16: Rule('__anon_star_0', ['__COMMA', 'value'], None, None), + 17: Rule('__anon_star_0', ['__anon_star_0', '__COMMA', 'value'], None, None), + 18: Rule('__anon_star_1', ['__COMMA', 'pair'], None, None), + 19: Rule('__anon_star_1', ['__anon_star_1', '__COMMA', 'pair'], None, None), +} +parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree) +class ParseTable: pass +parse_table = ParseTable() +STATES = { + 0: {0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 4: (0, 5), 5: (0, 6), 6: (0, 7), 7: (0, 8), 8: (0, 9), 9: (0, 10), 10: (0, 11), 11: (0, 12)}, + 1: {12: (1, 5), 13: (1, 5), 14: (1, 5), 15: (1, 5)}, + 2: {9: (0, 10), 14: (0, 13), 16: (0, 14), 11: (0, 15)}, + 3: {12: (1, 2), 13: (1, 2), 14: (1, 2), 15: (1, 2)}, + 4: {12: (1, 1), 13: (1, 1), 14: (1, 1), 15: (1, 1)}, + 5: {12: (0, 16)}, + 6: {7: (0, 17), 0: (0, 1), 1: (0, 2), 2: (0, 3), 3: (0, 4), 5: (0, 6), 6: (0, 7), 8: (0, 9), 9: (0, 10), 15: (0, 18), 10: (0, 11), 11: (0, 12)}, + 7: {12: (1, 4), 13: (1, 4), 14: (1, 4), 15: (1, 4)}, + 8: {12: (1, 0)}, + 9: {12: (1, 7), 13: (1, 7), 14: (1, 7), 15: (1, 7)}, + 10: {12: (1, 15), 17: (1, 15), 13: (1, 15), 14: (1, 15), 15: (1, 15)}, + 11: {12: (1, 6), 13: (1, 6), 14: (1, 6), 15: (1, 6)}, + 12: {12: (1, 3), 13: (1, 3), 14: (1, 3), 15: (1, 3)}, + 13: {13: (1, 13), 12: (1, 13), 14: (1, 13), 15: (1, 13)}, + 14: {14: (0, 19), 13: (0, 20), 18: (0, 21)}, + 15: {17: (0, 22)}, + 16: {}, + 17: {19: (0, 23), 15: (0, 24), 13: (0, 25)}, + 18: {13: (1, 10), 12: (1, 10), 14: (1, 10), 15: (1, 10)}, + 19: {13: (1, 12), 12: (1, 12), 14: (1, 12), 15: (1, 12)}, + 20: {9: (0, 10), 11: (0, 15), 16: (0, 26)}, + 21: {14: (0, 27), 13: (0, 28)}, + 22: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12), 7: (0, 29)}, + 23: {15: (0, 30), 13: (0, 31)}, + 24: {13: (1, 9), 12: (1, 9), 14: (1, 9), 15: (1, 9)}, + 25: {5: (0, 6), 1: (0, 2), 0: (0, 1), 8: (0, 9), 2: (0, 3), 3: (0, 4), 7: (0, 32), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)}, + 26: {13: (1, 18), 14: (1, 18)}, + 27: {13: (1, 11), 12: (1, 11), 14: (1, 11), 15: (1, 11)}, + 28: {16: (0, 33), 9: (0, 10), 11: (0, 15)}, + 29: {13: (1, 14), 14: (1, 14)}, + 30: {13: (1, 8), 12: (1, 8), 14: (1, 8), 15: (1, 8)}, + 31: {5: (0, 6), 1: (0, 2), 0: (0, 1), 7: (0, 34), 8: (0, 9), 2: (0, 3), 3: (0, 4), 9: (0, 10), 6: (0, 7), 10: (0, 11), 11: (0, 12)}, + 32: {15: (1, 16), 13: (1, 16)}, + 33: {13: (1, 19), 14: (1, 19)}, + 34: {15: (1, 17), 13: (1, 17)}, +} +TOKEN_TYPES = ( +{0: '__TRUE0', + 1: '__LBRACE', + 2: 'array', + 3: 'object', + 4: 'start', + 5: '__LSQB', + 6: 'SIGNED_NUMBER', + 7: 'value', + 8: '__NULL2', + 9: 'ESCAPED_STRING', + 10: '__FALSE1', + 11: 'string', + 12: '$END', + 13: '__COMMA', + 14: '__RBRACE', + 15: '__RSQB', + 16: 'pair', + 17: '__COLON', + 18: '__anon_star_1', + 19: '__anon_star_0'} +) +parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()} + for s, acts in STATES.items()} +parse_table.start_state = 0 +parse_table.end_state = 16 +class Lark_StandAlone: + def __init__(self, transformer=None, postlex=None): + callback = parse_tree_builder.create_callback(transformer=transformer) + callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES.values()} + self.parser = _Parser(parse_table, callbacks) + self.postlex = postlex + def parse(self, stream): + tokens = lex(stream) + if self.postlex: tokens = self.postlex.process(tokens) + return self.parser.parse(tokens) diff --git a/examples/standalone/json_parser_main.py b/examples/standalone/json_parser_main.py new file mode 100644 index 0000000..47c1bb1 --- /dev/null +++ b/examples/standalone/json_parser_main.py @@ -0,0 +1,25 @@ +import sys + +from json_parser import Lark_StandAlone, Transformer, inline_args + +class TreeToJson(Transformer): + @inline_args + def string(self, s): + return s[1:-1].replace('\\"', '"') + + array = list + pair = tuple + object = dict + number = inline_args(float) + + null = lambda self, _: None + true = lambda self, _: True + false = lambda self, _: False + + +parser = Lark_StandAlone(transformer=TreeToJson()) + +if __name__ == '__main__': + with open(sys.argv[1]) as f: + print(parser.parse(f.read())) +