From 5ac4120b71d9481eccba04e7c9a746c50be38fdc Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 10 Jan 2018 00:50:12 +0200 Subject: [PATCH] Stand-alone tool working for LALR+traditional lexer (first commit) --- lark/parse_tree_builder.py | 3 + lark/tools/standalone.py | 184 +++++++++++++++++++++++++++++++++++++ lark/tree.py | 2 + lark/utils.py | 36 ++++---- 4 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 lark/tools/standalone.py diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index f960931..7e52125 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -109,6 +109,8 @@ class ParseTreeBuilder: self.rule_builders = list(self._init_builders(rules)) + self.user_aliases = {} + def _init_builders(self, rules): filter_out = set() for rule in rules: @@ -144,6 +146,7 @@ class ParseTreeBuilder: except AttributeError: f = NodeBuilder(self.tree_class, user_callback_name) + self.user_aliases[rule] = rule.alias rule.alias = internal_callback_name for w in wrapper_chain: diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py new file mode 100644 index 0000000..54dc69a --- /dev/null +++ b/lark/tools/standalone.py @@ -0,0 +1,184 @@ +###{standalone +# +# +# Lark Stand-alone Generator Tool +# ---------------------------------- +# Git: https://github.com/erezsh/lark +# Author: Erez Shinan (erezshin@gmail.com) +# +# +# >>> LICENSE +# +# This tool and its generated code use a separate license from Lark. +# +# It is licensed under GPLv2 or above. +# +# If you wish to purchase a commercial license for this tool and its +# generated code, contact me via email. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# See . +# +# +###} + +import codecs +import sys +import os +from pprint import pprint +from os import path +from collections import defaultdict + +import lark +from lark import Lark + +from ..grammar import Rule + +__dir__ = path.dirname(__file__) +__larkdir__ = path.join(__dir__, path.pardir) + + +EXTRACT_STANDALONE_FILES = [ + 'tools/standalone.py', + 'utils.py', + 'common.py', + 'tree.py', + 'lexer.py', + 'parse_tree_builder.py', + 'parsers/lalr_parser.py', +] + + +def extract_sections(lines): + section = None + text = [] + sections = defaultdict(list) + for l in lines: + if l.startswith('###'): + if l[3] == '{': + section = l[4:].strip() + elif l[3] == '}': + sections[section] += text + section = None + text = [] + else: + raise ValueError(l) + elif section: + text.append(l) + + return {name:''.join(text) for name, text in sections.items()} + +class LexerAtoms: + def __init__(self, lexer): + assert not lexer.callback + self.mres = [(p.pattern,d) for p,d in lexer.mres] + self.newline_types = lexer.newline_types + self.ignore_types = lexer.ignore_types + + def print_python(self): + print('import re') + print('MRES = (') + pprint(self.mres) + print(')') + print('NEWLINE_TYPES = %s' % self.newline_types) + print('IGNORE_TYPES = %s' % self.ignore_types) + print('class LexerRegexps: pass') + print('lexer_regexps = LexerRegexps()') + print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]') + print('lexer_regexps.callback = {}') + print('lexer = _Lex(lexer_regexps)') + print('def lex(stream):') + print(' return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)') + + +class GetRule: + def __init__(self, rule_id): + self.rule_id = rule_id + + def __repr__(self): + return 'RULE_ID[%d]' % self.rule_id + + +def get_rule_ids(x): + if isinstance(x, (tuple, list)): + return type(x)(map(get_rule_ids, x)) + elif isinstance(x, dict): + return {get_rule_ids(k):get_rule_ids(v) for k, v in x.items()} + elif isinstance(x, Rule): + return GetRule(id(x)) + return x + +class ParserAtoms: + def __init__(self, parser): + self.parse_table = parser.analysis.parse_table + + def print_python(self): + print('class ParseTable: pass') + print('parse_table = ParseTable()') + print('parse_table.states = (') + pprint(get_rule_ids(self.parse_table.states)) + print(')') + print('parse_table.start_state = %s' % self.parse_table.start_state) + print('parse_table.end_state = %s' % self.parse_table.end_state) + print('class Lark_StandAlone:') + print(' def __init__(self, transformer=None):') + print(' callback = parse_tree_builder.create_callback(transformer=transformer)') + print(' callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES}') + print(' self.parser = _Parser(parse_table, callbacks)') + print(' def parse(self, stream):') + print(' return self.parser.parse(lex(stream))') + +class TreeBuilderAtoms: + def __init__(self, lark): + self.rules = lark.rules + self.ptb = lark._parse_tree_builder + + def print_python(self): + print('RULE_ID = {') + for r in self.rules: + print(' %d: Rule(%r, %r, %r, %r),' % (id(r), r.origin, r.expansion, self.ptb.user_aliases[r], r.options )) + print('}') + print('RULES = list(RULE_ID.values())') + print('parse_tree_builder = ParseTreeBuilder(RULES, Tree)') + +def main(fn): + with codecs.open(fn, encoding='utf8') as f: + lark_inst = Lark(f, parser="lalr") + + lexer_atoms = LexerAtoms(lark_inst.parser.lexer) + parser_atoms = ParserAtoms(lark_inst.parser.parser) + tree_builder_atoms = TreeBuilderAtoms(lark_inst) + + print('# Generated by Lark v%s' % lark.__version__) + + + for pyfile in EXTRACT_STANDALONE_FILES: + print (extract_sections(open(os.path.join(__larkdir__, pyfile)))['standalone']) + + print(open(os.path.join(__larkdir__, 'grammar.py')).read()) + print('Shift = 0') + print('Reduce = 1') + lexer_atoms.print_python() + tree_builder_atoms.print_python() + parser_atoms.print_python() + + # print('print(parser.parse(lex("1+2")).pretty())') + +if __name__ == '__main__': + if len(sys.argv) < 2: + print("Generates a stand-alone lalr parser") + print("Usage: %s " % sys.argv[0]) + sys.exit(1) + + fn ,= sys.argv[1:] + + main(fn) diff --git a/lark/tree.py b/lark/tree.py index 1639bb1..9c8e7da 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -101,6 +101,7 @@ class Tree(object): +###{standalone class Transformer(object): def _get_func(self, name): return getattr(self, name) @@ -197,6 +198,7 @@ class Transformer_NoRecurse(Transformer): def __default__(self, t): return t +###} def pydot__tree_to_png(tree, filename): diff --git a/lark/utils.py b/lark/utils.py index d984400..01c70a1 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -1,7 +1,4 @@ -import functools -import types from collections import deque -from contextlib import contextmanager class fzset(frozenset): def __repr__(self): @@ -49,8 +46,13 @@ try: except NameError: # Python 3 STRING_TYPE = str -Str = type(u'') +###{standalone +import types +import functools +from contextlib import contextmanager + +Str = type(u'') def inline_args(f): # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) @@ -76,19 +78,6 @@ def inline_args(f): return _f - -try: - compare = cmp -except NameError: - def compare(a, b): - if a == b: - return 0 - elif a > b: - return 1 - else: - return -1 - - try: from contextlib import suppress # Python 3 except ImportError: @@ -107,6 +96,19 @@ except ImportError: except excs: pass +###} +try: + compare = cmp +except NameError: + def compare(a, b): + if a == b: + return 0 + elif a > b: + return 1 + else: + return -1 + +