Sfoglia il codice sorgente

Merge branch 'MegaIng-standalone_options2'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 4 anni fa
parent
commit
6e25258202
5 ha cambiato i file con 153 aggiunte e 56 eliminazioni
  1. +12
    -2
      lark/lark.py
  2. +65
    -0
      lark/tools/__init__.py
  3. +9
    -17
      lark/tools/serialize.py
  4. +59
    -33
      lark/tools/standalone.py
  5. +8
    -4
      tests/test_tools.py

+ 12
- 2
lark/lark.py Vedi File

@@ -96,6 +96,15 @@ class LarkOptions(Serialize):
if __doc__:
__doc__ += OPTIONS_DOC


# Adding a new option needs to be done in multiple places:
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
# - In `lark-stubs/lark.pyi`:
# - As attribute to `LarkOptions`
# - As parameter to `Lark.__init__`
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults = {
'debug': False,
'keep_all_tokens': False,
@@ -163,8 +172,9 @@ class LarkOptions(Serialize):
return cls(data)


_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags',
'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'}
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}


class Lark(Serialize):


+ 65
- 0
lark/tools/__init__.py Vedi File

@@ -0,0 +1,65 @@
import sys
from argparse import ArgumentParser, FileType
try:
from textwrap import indent
except ImportError:
def indent(text, prefix):
return ''.join(prefix + line for line in text.splitlines(True))
from logging import DEBUG, INFO, WARN, ERROR
import warnings

from lark import Lark, logger

lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')

flags = [
('d', 'debug'),
'keep_all_tokens',
'regex',
'propagate_positions',
'maybe_placeholders',
'use_bytes'
]

options = ['start', 'lexer']

lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
lalr_argparser.add_argument('-s', '--start', action='append', default=[])
lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual'))
k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {}
lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)')
lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file')

for f in flags:
if isinstance(f, tuple):
options.append(f[1])
lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true')
else:
options.append(f)
lalr_argparser.add_argument('--' + f, action='store_true')


def build_lalr(namespace):
logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
if len(namespace.start) == 0:
namespace.start.append('start')
kwargs = {n: getattr(namespace, n) for n in options}
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out


def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
# Based on warnings._showwarnmsg_impl
text = warnings.formatwarning(message, category, filename, lineno, line)
text = indent(text, '# ')
if file is None:
file = sys.stderr
if file is None:
return
try:
file.write(text)
except OSError:
pass


def make_warnings_comments():
warnings.showwarning = showwarning_as_comment

+ 9
- 17
lark/tools/serialize.py Vedi File

@@ -5,20 +5,16 @@ import json
from lark import Lark
from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef
from lark.tools import lalr_argparser, build_lalr

import argparse

argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''')
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
epilog='Look at the Lark documentation for more info on the options')

argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file')
argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)')
argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+')
argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")')


def serialize(infile, outfile, lexer, start):
lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual

def serialize(lark_inst, outfile):
data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
outfile.write('{\n')
outfile.write(' "data": %s,\n' % json.dumps(data))
@@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start):


def main():
if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv:
print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file")
print("")
argparser.print_help()
else:
args = argparser.parse_args()
serialize(args.grammar_file, args.out, args.lexer, args.start)
ns = argparser.parse_args()
serialize(*build_lalr(ns))


if __name__ == '__main__':
main()
main()

+ 59
- 33
lark/tools/standalone.py Vedi File

@@ -26,22 +26,21 @@ from __future__ import print_function
#
#

import os
from io import open
###}

import codecs
import sys
import token, tokenize
import os
from pprint import pprint
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser, SUPPRESS
from warnings import warn

import lark
from lark import Lark
from lark.parsers.lalr_analysis import Reduce
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments


from lark.grammar import RuleOptions, Rule
@@ -120,48 +119,75 @@ def strip_docstrings(line_gen):


def main(fobj, start, print=print):
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
gen_standalone(lark_inst, print)

def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
if output is None:
output = partial(print, file=out)

import pickle, zlib, base64
def compressed_output(obj):
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
c = zlib.compress(s)
output(repr(base64.b85encode(c)))

print('# The file was automatically generated by Lark v%s' % lark.__version__)
print('__version__ = "%s"' % lark.__version__)
print()
def output_decompress(name):
output('%(name)s = pickle.loads(zlib.decompress(base64.b85decode(%(name)s)))' % locals())

output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()

for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
print(code)
output(code)

data, m = lark_inst.memo_serialize([TerminalDef, Rule])
print( 'DATA = (' )
# pprint(data, width=160)
print(data)
print(')')
print( 'MEMO = (')
print(m)
print(')')
output('import pickle, zlib, base64')
if compress:
output('DATA = (')
compressed_output(data)
output(')')
output_decompress('DATA')
output('MEMO = (')
compressed_output(m)
output(')')
output_decompress('MEMO')
else:
output('DATA = (')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')


print('Shift = 0')
print('Reduce = 1')
print("def Lark_StandAlone(**kwargs):")
print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")



if __name__ == '__main__':
if len(sys.argv) < 2:
print("Lark Stand-alone Generator Tool")
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
sys.exit(1)

if len(sys.argv) == 3:
fn, start = sys.argv[1:]
elif len(sys.argv) == 2:
fn, start = sys.argv[1], 'start'
else:
assert False, sys.argv

with codecs.open(fn, encoding='utf8') as f:
main(f, start)
def main():
make_warnings_comments()
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument("old_start", nargs='?', help=SUPPRESS)
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
ns = parser.parse_args()
if ns.old_start is not None:
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
ns.start.append(ns.old_start)

lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out, compress=ns.compress)

if __name__ == '__main__':
main()

+ 8
- 4
tests/test_tools.py Vedi File

@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function
import sys
from unittest import TestCase, main

from functools import partial
from lark import Lark
from lark.tree import Tree
from lark.tools import standalone

@@ -19,10 +19,9 @@ class TestStandalone(TestCase):
def setUp(self):
pass

def _create_standalone(self, grammar):
def _create_standalone(self, grammar, compress=False):
code_buf = StringIO()
pr = partial(print, file=code_buf)
standalone.main(StringIO(grammar), 'start', print=pr)
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress)
code = code_buf.getvalue()

context = {'__doc__': None}
@@ -53,6 +52,11 @@ class TestStandalone(TestCase):
self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve')
self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks')

context = self._create_standalone(grammar, compress=True)
_Lark = context['Lark_StandAlone']
l = _Lark()
x = l.parse('12 elephants')

def test_contextual(self):
grammar = """
start: a b


Caricamento…
Annulla
Salva