소스 검색

lark.tools.serialize and standalone can now take more options. Also added info where options need to be added

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 4 년 전
부모
커밋
2d74539424
5개의 변경된 파일96개의 추가작업 그리고 55개의 파일을 삭제
  1. +13
    -3
      lark/lark.py
  2. +37
    -0
      lark/tools/__init__.py
  3. +9
    -17
      lark/tools/serialize.py
  4. +35
    -32
      lark/tools/standalone.py
  5. +2
    -3
      tests/test_tools.py

+ 13
- 3
lark/lark.py 파일 보기

@@ -95,7 +95,16 @@ class LarkOptions(Serialize):
"""
if __doc__:
__doc__ += OPTIONS_DOC

# Adding a new option needs to be done in multiple places:
# - In the dictionary below. This is the primary truth which options `Lark.__init__` takes
# - In the doc string above. It is used both for the docstring of `LarkOptions` and `Lark`
# - In `lark-stubs/lark.pyi`:
# - As attribute to `LarkOptions`
# - As parameter to `Lark.__init__`
# - potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - potentially in `lark.tools.__init__`, when it can easily be passed as a cmd argument and makes sense
_defaults = {
'debug': False,
'keep_all_tokens': False,
@@ -163,8 +172,9 @@ class LarkOptions(Serialize):
return cls(data)


_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags',
'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'}
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}


class Lark(Serialize):


+ 37
- 0
lark/tools/__init__.py 파일 보기

@@ -0,0 +1,37 @@
import sys
from argparse import ArgumentParser, FileType
from lark import Lark

base_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')


flags = [
('d', 'debug'),
'keep_all_tokens',
'regex',
'propagate_positions',
'maybe_placeholders',
'use_bytes'
]

options = ['start', 'lexer']

base_argparser.add_argument('-s', '--start', action='append', default=[])
base_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual'))
k = {'encoding':'utf-8'} if sys.version_info > (3, 4) else {}
base_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)')
base_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file')

for f in flags:
if isinstance(f, tuple):
options.append(f[1])
base_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true')
else:
options.append(f)
base_argparser.add_argument('--' + f, action='store_true')

def build_lalr(namespace):
if len(namespace.start) == 0:
namespace.start.append('start')
kwargs = {n: getattr(namespace, n) for n in options}
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out

+ 9
- 17
lark/tools/serialize.py 파일 보기

@@ -5,20 +5,16 @@ import json
from lark import Lark
from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef
from lark.tools import base_argparser, build_lalr

import argparse

argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''')
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[base_argparser],
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
epilog='Look at the Lark documentation for more info on the options')

argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file')
argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)')
argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+')
argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")')


def serialize(infile, outfile, lexer, start):
lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual

def serialize(lark_inst, outfile):
data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
outfile.write('{\n')
outfile.write(' "data": %s,\n' % json.dumps(data))
@@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start):


def main():
if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv:
print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file")
print("")
argparser.print_help()
else:
args = argparser.parse_args()
serialize(args.grammar_file, args.out, args.lexer, args.start)
ns = argparser.parse_args()
serialize(*build_lalr(ns))


if __name__ == '__main__':
main()
main()

+ 35
- 32
lark/tools/standalone.py 파일 보기

@@ -26,22 +26,21 @@ from __future__ import print_function
#
#

import os
from io import open
###}

import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser, SUPPRESS
from warnings import warn

import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce
from lark.tools import base_argparser, build_lalr


from lark.grammar import RuleOptions, Rule
@@ -120,48 +119,52 @@ def strip_docstrings(line_gen):


def main(fobj, start, print=print):
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
gen_standalone(lark_inst, print)

def gen_standalone(lark_inst, output=None, out=sys.stdout):
if output is None:
output = partial(print, file=out)

print('# The file was automatically generated by Lark v%s' % lark.__version__)
print('__version__ = "%s"' % lark.__version__)
print()
output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()

for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
print(code)
output(code)

data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' )
output('DATA = (')
# pprint(data, width=160)
print(data)
print(')')
print( 'MEMO = (')
print(m)
print(')')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')


output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")

print('Shift = 0')
print('Reduce = 1')
print("def Lark_StandAlone(**kwargs):")
print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")



if __name__ == '__main__':
if len(sys.argv) < 2:
print("Lark Stand-alone Generator Tool")
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
sys.exit(1)

if len(sys.argv) == 3:
fn, start = sys.argv[1:]
elif len(sys.argv) == 2:
fn, start = sys.argv[1], 'start'
else:
assert False, sys.argv

with codecs.open(fn, encoding='utf8') as f:
main(f, start)
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[base_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument("old_start", nargs='?', help=SUPPRESS)
ns = parser.parse_args()
if ns.old_start is not None:
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
ns.start.append(ns.old_start)
lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out)

+ 2
- 3
tests/test_tools.py 파일 보기

@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function
import sys
from unittest import TestCase, main

from functools import partial
from lark import Lark
from lark.tree import Tree
from lark.tools import standalone

@@ -21,8 +21,7 @@ class TestStandalone(TestCase):

def _create_standalone(self, grammar):
code_buf = StringIO()
pr = partial(print, file=code_buf)
standalone.main(StringIO(grammar), 'start', print=pr)
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf)
code = code_buf.getvalue()

context = {'__doc__': None}


불러오는 중...
취소
저장