Stand-alone tool working for LALR+traditional lexer (first commit)

6 vuotta sitten · 5ac4120b71
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -109,6 +109,8 @@ class ParseTreeBuilder:

        self.rule_builders = list(self._init_builders(rules))

        self.user_aliases = {}

    def _init_builders(self, rules):
        filter_out = set()
        for rule in rules:
@@ -144,6 +146,7 @@ class ParseTreeBuilder:
            except AttributeError:
                f = NodeBuilder(self.tree_class, user_callback_name)

            self.user_aliases[rule] = rule.alias
            rule.alias = internal_callback_name

            for w in wrapper_chain:
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -0,0 +1,184 @@
 ###{standalone
 #
 #
 #   Lark Stand-alone Generator Tool
 # ----------------------------------
 # Git:    https://github.com/erezsh/lark
 # Author: Erez Shinan (erezshin@gmail.com)
 #
 #
 #    >>> LICENSE
 #
 #    This tool and its generated code use a separate license from Lark.
 #
 #    It is licensed under GPLv2 or above.
 #
 #    If you wish to purchase a commercial license for this tool and its
 #    generated code, contact me via email.
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 2 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    See <http://www.gnu.org/licenses/>.
 #
 #
 ###}

 import codecs
 import sys
 import os
 from pprint import pprint
 from os import path
 from collections import defaultdict

 import lark
 from lark import Lark

 from ..grammar import Rule

 __dir__ = path.dirname(__file__)
 __larkdir__ = path.join(__dir__, path.pardir)


 EXTRACT_STANDALONE_FILES = [
    'tools/standalone.py',
    'utils.py',
    'common.py',
    'tree.py',
    'lexer.py',
    'parse_tree_builder.py',
    'parsers/lalr_parser.py',
 ]


 def extract_sections(lines):
    section = None
    text = []
    sections = defaultdict(list)
    for l in lines:
        if l.startswith('###'):
            if l[3] == '{':
                section = l[4:].strip()
            elif l[3] == '}':
                sections[section] += text
                section = None
                text = []
            else:
                raise ValueError(l)
        elif section:
            text.append(l)

    return {name:''.join(text) for name, text in sections.items()}

 class LexerAtoms:
    def __init__(self, lexer):
        assert not lexer.callback
        self.mres = [(p.pattern,d) for p,d in lexer.mres]
        self.newline_types = lexer.newline_types
        self.ignore_types = lexer.ignore_types

    def print_python(self):
        print('import re')
        print('MRES = (')
        pprint(self.mres)
        print(')')
        print('NEWLINE_TYPES = %s' % self.newline_types)
        print('IGNORE_TYPES = %s' % self.ignore_types)
        print('class LexerRegexps: pass')
        print('lexer_regexps = LexerRegexps()')
        print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]')
        print('lexer_regexps.callback = {}')
        print('lexer = _Lex(lexer_regexps)')
        print('def lex(stream):')
        print('    return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)')


 class GetRule:
    def __init__(self, rule_id):
        self.rule_id = rule_id

    def __repr__(self):
        return 'RULE_ID[%d]' % self.rule_id


 def get_rule_ids(x):
    if isinstance(x, (tuple, list)):
        return type(x)(map(get_rule_ids, x))
    elif isinstance(x, dict):
        return {get_rule_ids(k):get_rule_ids(v) for k, v in x.items()}
    elif isinstance(x, Rule):
        return GetRule(id(x))
    return x

 class ParserAtoms:
    def __init__(self, parser):
        self.parse_table = parser.analysis.parse_table

    def print_python(self):
        print('class ParseTable: pass')
        print('parse_table = ParseTable()')
        print('parse_table.states = (')
        pprint(get_rule_ids(self.parse_table.states))
        print(')')
        print('parse_table.start_state = %s' % self.parse_table.start_state)
        print('parse_table.end_state = %s' % self.parse_table.end_state)
        print('class Lark_StandAlone:')
        print('  def __init__(self, transformer=None):')
        print('     callback = parse_tree_builder.create_callback(transformer=transformer)')
        print('     callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES}')
        print('     self.parser = _Parser(parse_table, callbacks)')
        print('  def parse(self, stream):')
        print('     return self.parser.parse(lex(stream))')

 class TreeBuilderAtoms:
    def __init__(self, lark):
        self.rules = lark.rules
        self.ptb = lark._parse_tree_builder

    def print_python(self):
        print('RULE_ID = {')
        for r in self.rules:
            print(' %d: Rule(%r, %r, %r, %r),' % (id(r), r.origin, r.expansion, self.ptb.user_aliases[r], r.options ))
        print('}')
        print('RULES = list(RULE_ID.values())')
        print('parse_tree_builder = ParseTreeBuilder(RULES, Tree)')

 def main(fn):
    with codecs.open(fn, encoding='utf8') as f:
        lark_inst = Lark(f, parser="lalr")

    lexer_atoms = LexerAtoms(lark_inst.parser.lexer)
    parser_atoms = ParserAtoms(lark_inst.parser.parser)
    tree_builder_atoms = TreeBuilderAtoms(lark_inst)

    print('# Generated by Lark v%s' % lark.__version__)


    for pyfile in EXTRACT_STANDALONE_FILES:
        print (extract_sections(open(os.path.join(__larkdir__, pyfile)))['standalone'])

    print(open(os.path.join(__larkdir__, 'grammar.py')).read())
    print('Shift = 0')
    print('Reduce = 1')
    lexer_atoms.print_python()
    tree_builder_atoms.print_python()
    parser_atoms.print_python()

    # print('print(parser.parse(lex("1+2")).pretty())')

 if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Generates a stand-alone lalr parser")
        print("Usage: %s <grammar_path>" % sys.argv[0])
        sys.exit(1)

    fn ,= sys.argv[1:]

    main(fn)
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -101,6 +101,7 @@ class Tree(object):



 ###{standalone
 class Transformer(object):
    def _get_func(self, name):
        return getattr(self, name)
@@ -197,6 +198,7 @@ class Transformer_NoRecurse(Transformer):

    def __default__(self, t):
        return t
 ###}


 def pydot__tree_to_png(tree, filename):
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -1,7 +1,4 @@
 import functools
 import types
 from collections import deque
 from contextlib import contextmanager

 class fzset(frozenset):
    def __repr__(self):
@@ -49,8 +46,13 @@ try:
 except NameError:   # Python 3
    STRING_TYPE = str

 Str = type(u'')
 ###{standalone

 import types
 import functools
 from contextlib import contextmanager

 Str = type(u'')

 def inline_args(f):
    # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType)
@@ -76,19 +78,6 @@ def inline_args(f):
        return _f



 try:
    compare = cmp
 except NameError:
    def compare(a, b):
        if a == b:
            return 0
        elif a > b:
            return 1
        else:
            return -1


 try:
    from contextlib import suppress     # Python 3
 except ImportError:
@@ -107,6 +96,19 @@ except ImportError:
        except excs:
            pass

 ###}



 try:
    compare = cmp
 except NameError:
    def compare(a, b):
        if a == b:
            return 0
        elif a > b:
            return 1
        else:
            return -1