jmg
/
gitmirror


			
							###{standalone
#
#
#   Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser with a standard lexer
#
# Git:    https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
#    >>> LICENSE
#
#    This tool and its generated code use a separate license from Lark,
#    and are subject to the terms of the Mozilla Public License, v. 2.0.
#    If a copy of the MPL was not distributed with this
#    file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
#    If you wish to purchase a commercial license for this tool and its
#    generated code, you may contact me via email or otherwise.
#
#    If MPL2 is incompatible with your free or open-source project,
#    contact me and we'll work it out.
#
#

import os
from io import open
###}

import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
from collections import defaultdict

import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce


from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef

_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)


EXTRACT_STANDALONE_FILES = [
    'tools/standalone.py',
    'exceptions.py',
    'utils.py',
    'tree.py',
    'visitors.py',
    'indenter.py',
    'grammar.py',
    'lexer.py',
    'common.py',
    'parse_tree_builder.py',
    'parsers/lalr_parser.py',
    'parsers/lalr_analysis.py',
    'parser_frontends.py',
    'lark.py',
]

def extract_sections(lines):
    section = None
    text = []
    sections = defaultdict(list)
    for l in lines:
        if l.startswith('###'):
            if l[3] == '{':
                section = l[4:].strip()
            elif l[3] == '}':
                sections[section] += text
                section = None
                text = []
            else:
                raise ValueError(l)
        elif section:
            text.append(l)

    return {name:''.join(text) for name, text in sections.items()}


def strip_docstrings(line_gen):
    """ Strip comments and docstrings from a file.
    Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
    """
    res = []

    prev_toktype = token.INDENT
    last_lineno = -1
    last_col = 0

    tokgen = tokenize.generate_tokens(line_gen)
    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
        if slineno > last_lineno:
            last_col = 0
        if scol > last_col:
            res.append(" " * (scol - last_col))
        if toktype == token.STRING and prev_toktype == token.INDENT:
            # Docstring
            res.append("#--")
        elif toktype == tokenize.COMMENT:
            # Comment
            res.append("##\n")
        else:
            res.append(ttext)
        prev_toktype = toktype
        last_col = ecol
        last_lineno = elineno

    return ''.join(res)


def main(fobj, start):
    lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)

    print('# The file was automatically generated by Lark v%s' % lark.__version__)
    print('__version__ = "%s"' % lark.__version__)
    print()

    for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
        with open(os.path.join(_larkdir, pyfile)) as f:
            code = extract_sections(f)['standalone']
            if i:   # if not this file
                code = strip_docstrings(iter(code.splitlines(True)).__next__)
            print(code)

    data, m = lark_inst.memo_serialize([TerminalDef, Rule])
    print( 'DATA = (' )
    # pprint(data, width=160)
    print(data)
    print(')')
    print( 'MEMO = (')
    print(m)
    print(')')


    print('Shift = 0')
    print('Reduce = 1')
    print("def Lark_StandAlone(transformer=None, postlex=None):")
    print("  return Lark._load_from_dict(DATA, MEMO, transformer=transformer, postlex=postlex)")


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Lark Stand-alone Generator Tool")
        print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
        sys.exit(1)

    if len(sys.argv) == 3:
        fn, start = sys.argv[1:]
    elif len(sys.argv) == 2:
        fn, start = sys.argv[1], 'start'
    else:
        assert False, sys.argv

    with codecs.open(fn, encoding='utf8') as f:
        main(f, start)