Standalone generator now remove docstrings and comments.

The result is a much smaller file.
4 years ago · bf2d9bf7b1
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -30,6 +30,7 @@ from io import open

 import codecs
 import sys
 import token, tokenize
 import os
 from pprint import pprint
 from os import path
@@ -84,6 +85,37 @@ def extract_sections(lines):
    return {name:''.join(text) for name, text in sections.items()}


 def strip_docstrings(line_gen):
    """ Strip comments and docstrings from a file.
    Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
    """
    res = []

    prev_toktype = token.INDENT
    last_lineno = -1
    last_col = 0

    tokgen = tokenize.generate_tokens(line_gen)
    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
        if slineno > last_lineno:
            last_col = 0
        if scol > last_col:
            res.append(" " * (scol - last_col))
        if toktype == token.STRING and prev_toktype == token.INDENT:
            # Docstring
            res.append("#--")
        elif toktype == tokenize.COMMENT:
            # Comment
            res.append("##\n")
        else:
            res.append(ttext)
        prev_toktype = toktype
        last_col = ecol
        last_lineno = elineno

    return ''.join(res)


 def main(fobj, start):
    lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)

@@ -91,9 +123,12 @@ def main(fobj, start):
    print('__version__ = "%s"' % lark.__version__)
    print()

    for pyfile in EXTRACT_STANDALONE_FILES:
    for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
        with open(os.path.join(_larkdir, pyfile)) as f:
            print (extract_sections(f)['standalone'])
            code = extract_sections(f)['standalone']
            if i:   # if not this file
                code = strip_docstrings(iter(code.splitlines(True)).__next__)
            print(code)

    data, m = lark_inst.memo_serialize([TerminalDef, Rule])
    print( 'DATA = (' )