Browse Source

lark.tools.serialize and standalone can now take more options. Also added info where options need to be added

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
MegaIng1 4 years ago
parent
commit
2d74539424
5 changed files with 96 additions and 55 deletions
  1. +13
    -3
      lark/lark.py
  2. +37
    -0
      lark/tools/__init__.py
  3. +9
    -17
      lark/tools/serialize.py
  4. +35
    -32
      lark/tools/standalone.py
  5. +2
    -3
      tests/test_tools.py

+ 13
- 3
lark/lark.py View File

@@ -95,7 +95,16 @@ class LarkOptions(Serialize):
"""
if __doc__:
__doc__ += OPTIONS_DOC

# Adding a new option needs to be done in multiple places:
# - In the dictionary below. This is the primary truth which options `Lark.__init__` takes
# - In the doc string above. It is used both for the docstring of `LarkOptions` and `Lark`
# - In `lark-stubs/lark.pyi`:
# - As attribute to `LarkOptions`
# - As parameter to `Lark.__init__`
# - potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - potentially in `lark.tools.__init__`, when it can easily be passed as a cmd argument and makes sense
_defaults = {
'debug': False,
'keep_all_tokens': False,
@@ -163,8 +172,9 @@ class LarkOptions(Serialize):
return cls(data)


_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags',
'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'}
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}


class Lark(Serialize):


+ 37
- 0
lark/tools/__init__.py View File

@@ -0,0 +1,37 @@
import sys
from argparse import ArgumentParser, FileType
from lark import Lark

base_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')


flags = [
('d', 'debug'),
'keep_all_tokens',
'regex',
'propagate_positions',
'maybe_placeholders',
'use_bytes'
]

options = ['start', 'lexer']

base_argparser.add_argument('-s', '--start', action='append', default=[])
base_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual'))
k = {'encoding':'utf-8'} if sys.version_info > (3, 4) else {}
base_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)')
base_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file')

for f in flags:
if isinstance(f, tuple):
options.append(f[1])
base_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true')
else:
options.append(f)
base_argparser.add_argument('--' + f, action='store_true')

def build_lalr(namespace):
if len(namespace.start) == 0:
namespace.start.append('start')
kwargs = {n: getattr(namespace, n) for n in options}
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out

+ 9
- 17
lark/tools/serialize.py View File

@@ -5,20 +5,16 @@ import json
from lark import Lark
from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef
from lark.tools import base_argparser, build_lalr

import argparse

argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''')
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[base_argparser],
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
epilog='Look at the Lark documentation for more info on the options')

argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file')
argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)')
argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+')
argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")')


def serialize(infile, outfile, lexer, start):
lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual

def serialize(lark_inst, outfile):
data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
outfile.write('{\n')
outfile.write(' "data": %s,\n' % json.dumps(data))
@@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start):


def main():
if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv:
print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file")
print("")
argparser.print_help()
else:
args = argparser.parse_args()
serialize(args.grammar_file, args.out, args.lexer, args.start)
ns = argparser.parse_args()
serialize(*build_lalr(ns))


if __name__ == '__main__':
main()
main()

+ 35
- 32
lark/tools/standalone.py View File

@@ -26,22 +26,21 @@ from __future__ import print_function
#
#

import os
from io import open
###}

import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser, SUPPRESS
from warnings import warn

import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce
from lark.tools import base_argparser, build_lalr


from lark.grammar import RuleOptions, Rule
@@ -120,48 +119,52 @@ def strip_docstrings(line_gen):


def main(fobj, start, print=print):
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
gen_standalone(lark_inst, print)

def gen_standalone(lark_inst, output=None, out=sys.stdout):
if output is None:
output = partial(print, file=out)

print('# The file was automatically generated by Lark v%s' % lark.__version__)
print('__version__ = "%s"' % lark.__version__)
print()
output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()

for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
print(code)
output(code)

data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' )
output('DATA = (')
# pprint(data, width=160)
print(data)
print(')')
print( 'MEMO = (')
print(m)
print(')')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')


output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")

print('Shift = 0')
print('Reduce = 1')
print("def Lark_StandAlone(**kwargs):")
print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")



if __name__ == '__main__':
if len(sys.argv) < 2:
print("Lark Stand-alone Generator Tool")
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
sys.exit(1)

if len(sys.argv) == 3:
fn, start = sys.argv[1:]
elif len(sys.argv) == 2:
fn, start = sys.argv[1], 'start'
else:
assert False, sys.argv

with codecs.open(fn, encoding='utf8') as f:
main(f, start)
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[base_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument("old_start", nargs='?', help=SUPPRESS)
ns = parser.parse_args()
if ns.old_start is not None:
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
ns.start.append(ns.old_start)
lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out)

+ 2
- 3
tests/test_tools.py View File

@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function
import sys
from unittest import TestCase, main

from functools import partial
from lark import Lark
from lark.tree import Tree
from lark.tools import standalone

@@ -21,8 +21,7 @@ class TestStandalone(TestCase):

def _create_standalone(self, grammar):
code_buf = StringIO()
pr = partial(print, file=code_buf)
standalone.main(StringIO(grammar), 'start', print=pr)
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf)
code = code_buf.getvalue()

context = {'__doc__': None}


Loading…
Cancel
Save