|
|
@@ -30,6 +30,7 @@ from io import open |
|
|
|
|
|
|
|
import codecs |
|
|
|
import sys |
|
|
|
import token, tokenize |
|
|
|
import os |
|
|
|
from pprint import pprint |
|
|
|
from os import path |
|
|
@@ -84,6 +85,37 @@ def extract_sections(lines): |
|
|
|
return {name:''.join(text) for name, text in sections.items()} |
|
|
|
|
|
|
|
|
|
|
|
def strip_docstrings(line_gen): |
|
|
|
""" Strip comments and docstrings from a file. |
|
|
|
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings |
|
|
|
""" |
|
|
|
res = [] |
|
|
|
|
|
|
|
prev_toktype = token.INDENT |
|
|
|
last_lineno = -1 |
|
|
|
last_col = 0 |
|
|
|
|
|
|
|
tokgen = tokenize.generate_tokens(line_gen) |
|
|
|
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: |
|
|
|
if slineno > last_lineno: |
|
|
|
last_col = 0 |
|
|
|
if scol > last_col: |
|
|
|
res.append(" " * (scol - last_col)) |
|
|
|
if toktype == token.STRING and prev_toktype == token.INDENT: |
|
|
|
# Docstring |
|
|
|
res.append("#--") |
|
|
|
elif toktype == tokenize.COMMENT: |
|
|
|
# Comment |
|
|
|
res.append("##\n") |
|
|
|
else: |
|
|
|
res.append(ttext) |
|
|
|
prev_toktype = toktype |
|
|
|
last_col = ecol |
|
|
|
last_lineno = elineno |
|
|
|
|
|
|
|
return ''.join(res) |
|
|
|
|
|
|
|
|
|
|
|
def main(fobj, start): |
|
|
|
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) |
|
|
|
|
|
|
@@ -91,9 +123,12 @@ def main(fobj, start): |
|
|
|
print('__version__ = "%s"' % lark.__version__) |
|
|
|
print() |
|
|
|
|
|
|
|
for pyfile in EXTRACT_STANDALONE_FILES: |
|
|
|
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): |
|
|
|
with open(os.path.join(_larkdir, pyfile)) as f: |
|
|
|
print (extract_sections(f)['standalone']) |
|
|
|
code = extract_sections(f)['standalone'] |
|
|
|
if i: # if not this file |
|
|
|
code = strip_docstrings(iter(code.splitlines(True)).__next__) |
|
|
|
print(code) |
|
|
|
|
|
|
|
data, m = lark_inst.memo_serialize([TerminalDef, Rule]) |
|
|
|
print( 'DATA = (' ) |
|
|
|