Browse Source

Standalone generator now remove docstrings and comments.

The result is a much smaller file.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 4 years ago
parent
commit
bf2d9bf7b1
2 changed files with 307 additions and 245 deletions
  1. +270
    -243
      examples/standalone/json_parser.py
  2. +37
    -2
      lark/tools/standalone.py

+ 270
- 243
examples/standalone/json_parser.py
File diff suppressed because it is too large
View File


+ 37
- 2
lark/tools/standalone.py View File

@@ -30,6 +30,7 @@ from io import open

import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
@@ -84,6 +85,37 @@ def extract_sections(lines):
return {name:''.join(text) for name, text in sections.items()}


def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []

prev_toktype = token.INDENT
last_lineno = -1
last_col = 0

tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno

return ''.join(res)


def main(fobj, start):
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)

@@ -91,9 +123,12 @@ def main(fobj, start):
print('__version__ = "%s"' % lark.__version__)
print()

for pyfile in EXTRACT_STANDALONE_FILES:
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
print (extract_sections(f)['standalone'])
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(iter(code.splitlines(True)).__next__)
print(code)

data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' )


Loading…
Cancel
Save