Browse Source

Standalone generator now remove docstrings and comments.

The result is a much smaller file.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 5 years ago
parent
commit
bf2d9bf7b1
2 changed files with 307 additions and 245 deletions
  1. +270
    -243
      examples/standalone/json_parser.py
  2. +37
    -2
      lark/tools/standalone.py

+ 270
- 243
examples/standalone/json_parser.py
File diff suppressed because it is too large
View File


+ 37
- 2
lark/tools/standalone.py View File

@@ -30,6 +30,7 @@ from io import open


import codecs import codecs
import sys import sys
import token, tokenize
import os import os
from pprint import pprint from pprint import pprint
from os import path from os import path
@@ -84,6 +85,37 @@ def extract_sections(lines):
return {name:''.join(text) for name, text in sections.items()} return {name:''.join(text) for name, text in sections.items()}




def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []

prev_toktype = token.INDENT
last_lineno = -1
last_col = 0

tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno

return ''.join(res)


def main(fobj, start): def main(fobj, start):
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)


@@ -91,9 +123,12 @@ def main(fobj, start):
print('__version__ = "%s"' % lark.__version__) print('__version__ = "%s"' % lark.__version__)
print() print()


for pyfile in EXTRACT_STANDALONE_FILES:
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f: with open(os.path.join(_larkdir, pyfile)) as f:
print (extract_sections(f)['standalone'])
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(iter(code.splitlines(True)).__next__)
print(code)


data, m = lark_inst.memo_serialize([TerminalDef, Rule]) data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' ) print( 'DATA = (' )


Loading…
Cancel
Save