jmg
/
gitmirror


			
							###{standalone
#
#
#   Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser with a standard lexer
#
# Git:    https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
#    >>> LICENSE
#
#    This tool and its generated code use a separate license from Lark.
#
#    It is licensed under GPLv2 or above.
#
#    If you wish to purchase a commercial license for this tool and its
#    generated code, contact me via email.
#
#    If GPL is incompatible with your free or open-source project,
#    contact me and we'll work it out (for free).
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    See <http://www.gnu.org/licenses/>.
#
#
###}

import codecs
import sys
import os
from pprint import pprint
from os import path
from collections import defaultdict

import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce

_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)


EXTRACT_STANDALONE_FILES = [
    'tools/standalone.py',
    'exceptions.py',
    'utils.py',
    'tree.py',
    'visitors.py',
    'indenter.py',
    'lexer.py',
    'parse_tree_builder.py',
    'parsers/lalr_parser.py',
]


def extract_sections(lines):
    section = None
    text = []
    sections = defaultdict(list)
    for l in lines:
        if l.startswith('###'):
            if l[3] == '{':
                section = l[4:].strip()
            elif l[3] == '}':
                sections[section] += text
                section = None
                text = []
            else:
                raise ValueError(l)
        elif section:
            text.append(l)

    return {name:''.join(text) for name, text in sections.items()}

def _prepare_mres(mres):
    return [(p.pattern,{i: t for i, t in d.items()}) for p,d in mres]

class TraditionalLexerAtoms:
    def __init__(self, lexer):
        self.mres = _prepare_mres(lexer.mres)
        self.newline_types = lexer.newline_types
        self.ignore_types = lexer.ignore_types
        self.callback = {name:_prepare_mres(c.mres)
                         for name, c in lexer.callback.items()}

    def print_python(self):
        print('import re')
        print('class LexerRegexps: pass')
        print('NEWLINE_TYPES = %s' % self.newline_types)
        print('IGNORE_TYPES = %s' % self.ignore_types)
        self._print_python('lexer')

    def _print_python(self, var_name):
        print('MRES = (')
        pprint(self.mres)
        print(')')
        print('LEXER_CALLBACK = (')
        pprint(self.callback)
        print(')')
        print('lexer_regexps = LexerRegexps()')
        print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]')
        print('lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])')
        print('                          for n, mres in LEXER_CALLBACK.items()}')
        print('%s = (lexer_regexps)' % var_name)


class ContextualLexerAtoms:
    def __init__(self, lexer):
        self.lexer_atoms = {state: TraditionalLexerAtoms(lexer) for state, lexer in lexer.lexers.items()}
        self.root_lexer_atoms = TraditionalLexerAtoms(lexer.root_lexer)

    def print_python(self):
        print('import re')
        print('class LexerRegexps: pass')
        print('NEWLINE_TYPES = %s' % self.root_lexer_atoms.newline_types)
        print('IGNORE_TYPES = %s' % self.root_lexer_atoms.ignore_types)

        print('LEXERS = {}')
        for state, lexer_atoms in self.lexer_atoms.items():
            lexer_atoms._print_python('LEXERS[%d]' % state)

        print('class ContextualLexer:')
        print('    def __init__(self):')
        print('        self.lexers = LEXERS')
        print('        self.set_parser_state(None)')
        print('    def set_parser_state(self, state):')
        print('        self.parser_state = state')
        print('    def lex(self, stream):')
        print('        newline_types = NEWLINE_TYPES')
        print('        ignore_types = IGNORE_TYPES')
        print('        lexers = LEXERS')
        print('        l = _Lex(lexers[self.parser_state], self.parser_state)')
        print('        for x in l.lex(stream, newline_types, ignore_types):')
        print('            yield x')
        print('            l.lexer = lexers[self.parser_state]')
        print('            l.state = self.parser_state')

        print('CON_LEXER = ContextualLexer()')
        print('def lex(stream):')
        print('    return CON_LEXER.lex(stream)')

class GetRule:
    def __init__(self, rule_id):
        self.rule_id = rule_id

    def __repr__(self):
        return 'RULES[%d]' % self.rule_id

rule_ids = {}
token_types = {}

def _get_token_type(token_type):
    if token_type not in token_types:
        token_types[token_type] = len(token_types)
    return token_types[token_type]

class ParserAtoms:
    def __init__(self, parser):
        self.parse_table = parser._parse_table

    def print_python(self):
        print('class ParseTable: pass')
        print('parse_table = ParseTable()')
        print('STATES = {')
        for state, actions in self.parse_table.states.items():
            print('  %r: %r,' % (state, {_get_token_type(token): ((1, rule_ids[arg]) if action is Reduce else (0, arg))
                            for token, (action, arg) in actions.items()}))
        print('}')
        print('TOKEN_TYPES = (')
        pprint({v:k for k, v in token_types.items()})
        print(')')
        print('parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()}')
        print('                      for s, acts in STATES.items()}')
        print('parse_table.start_state = %s' % self.parse_table.start_state)
        print('parse_table.end_state = %s' % self.parse_table.end_state)
        print('class Lark_StandAlone:')
        print('  def __init__(self, transformer=None, postlex=None):')
        print('     callbacks = parse_tree_builder.create_callback(transformer=transformer)')
        print('     self.parser = _Parser(parse_table, callbacks)')
        print('     self.postlex = postlex')
        print('  def parse(self, stream):')
        print('     tokens = lex(stream)')
        print('     sps = CON_LEXER.set_parser_state')
        print('     if self.postlex: tokens = self.postlex.process(tokens)')
        print('     return self.parser.parse(tokens, sps)')

class TreeBuilderAtoms:
    def __init__(self, lark):
        self.rules = lark.rules

    def print_python(self):
        # print('class InlineTransformer: pass')
        print('RULES = {')
        for i, r in enumerate(self.rules):
            rule_ids[r] = i
            print('  %d: Rule(%r, [%s], alias=%r, options=%r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), r.alias, r.options ))
        print('}')
        print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)')

def main(fobj, start):
    lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)

    lexer_atoms = ContextualLexerAtoms(lark_inst.parser.lexer)
    parser_atoms = ParserAtoms(lark_inst.parser.parser)
    tree_builder_atoms = TreeBuilderAtoms(lark_inst)

    print('# The file was automatically generated by Lark v%s' % lark.__version__)

    for pyfile in EXTRACT_STANDALONE_FILES:
        with open(os.path.join(_larkdir, pyfile)) as f:
            print (extract_sections(f)['standalone'])

    with open(os.path.join(_larkdir, 'grammar.py')) as grammar_py:
        print(grammar_py.read())

    print('Shift = 0')
    print('Reduce = 1')
    lexer_atoms.print_python()
    tree_builder_atoms.print_python()
    parser_atoms.print_python()

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Lark Stand-alone Generator Tool")
        print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
        sys.exit(1)

    if len(sys.argv) == 3:
        fn, start = sys.argv[1:]
    elif len(sys.argv) == 2:
        fn, start = sys.argv[1], 'start'
    else:
        assert False, sys.argv

    with codecs.open(fn, encoding='utf8') as f:
        main(f, start)