From 2d74539424958be17face4c7e9f7e2fcebc5b577 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Mon, 5 Oct 2020 23:23:43 +0200 Subject: [PATCH 1/5] lark.tools.serialize and standalone can now take more options. Also added info where options need to be added --- lark/lark.py | 16 ++++++++-- lark/tools/__init__.py | 37 ++++++++++++++++++++++ lark/tools/serialize.py | 26 ++++++---------- lark/tools/standalone.py | 67 +++++++++++++++++++++------------------- tests/test_tools.py | 5 ++- 5 files changed, 96 insertions(+), 55 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 770b821..776469a 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -95,7 +95,16 @@ class LarkOptions(Serialize): """ if __doc__: __doc__ += OPTIONS_DOC - + + + # Adding a new option needs to be done in multiple places: + # - In the dictionary below. This is the primary truth which options `Lark.__init__` takes + # - In the doc string above. It is used both for the docstring of `LarkOptions` and `Lark` + # - In `lark-stubs/lark.pyi`: + # - As attribute to `LarkOptions` + # - As parameter to `Lark.__init__` + # - potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded + # - potentially in `lark.tools.__init__`, when it can easily be passed as a cmd argument and makes sense _defaults = { 'debug': False, 'keep_all_tokens': False, @@ -163,8 +172,9 @@ class LarkOptions(Serialize): return cls(data) -_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', - 'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'} +# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. +# These option are only used outside of `load_grammar`. +_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} class Lark(Serialize): diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py index e69de29..b9e8c01 100644 --- a/lark/tools/__init__.py +++ b/lark/tools/__init__.py @@ -0,0 +1,37 @@ +import sys +from argparse import ArgumentParser, FileType +from lark import Lark + +base_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') + + +flags = [ + ('d', 'debug'), + 'keep_all_tokens', + 'regex', + 'propagate_positions', + 'maybe_placeholders', + 'use_bytes' +] + +options = ['start', 'lexer'] + +base_argparser.add_argument('-s', '--start', action='append', default=[]) +base_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) +k = {'encoding':'utf-8'} if sys.version_info > (3, 4) else {} +base_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') +base_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') + +for f in flags: + if isinstance(f, tuple): + options.append(f[1]) + base_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') + else: + options.append(f) + base_argparser.add_argument('--' + f, action='store_true') + +def build_lalr(namespace): + if len(namespace.start) == 0: + namespace.start.append('start') + kwargs = {n: getattr(namespace, n) for n in options} + return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out \ No newline at end of file diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py index fb69d35..fb9b98f 100644 --- a/lark/tools/serialize.py +++ b/lark/tools/serialize.py @@ -5,20 +5,16 @@ import json from lark import Lark from lark.grammar import RuleOptions, Rule from lark.lexer import TerminalDef +from lark.tools import base_argparser, build_lalr import argparse -argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''') +argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[base_argparser], + description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", + epilog='Look at the Lark documentation for more info on the options') -argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file') -argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)') -argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+') -argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")') - - -def serialize(infile, outfile, lexer, start): - lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual +def serialize(lark_inst, outfile): data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) outfile.write('{\n') outfile.write(' "data": %s,\n' % json.dumps(data)) @@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start): def main(): - if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv: - print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file") - print("") - argparser.print_help() - else: - args = argparser.parse_args() - serialize(args.grammar_file, args.out, args.lexer, args.start) + ns = argparser.parse_args() + serialize(*build_lalr(ns)) + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index f2af015..2705838 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -26,22 +26,21 @@ from __future__ import print_function # # -import os from io import open ###} -import codecs import sys import token, tokenize import os -from pprint import pprint from os import path from collections import defaultdict from functools import partial +from argparse import ArgumentParser, SUPPRESS +from warnings import warn import lark from lark import Lark -from lark.parsers.lalr_analysis import Reduce +from lark.tools import base_argparser, build_lalr from lark.grammar import RuleOptions, Rule @@ -120,48 +119,52 @@ def strip_docstrings(line_gen): def main(fobj, start, print=print): + warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) + gen_standalone(lark_inst, print) + +def gen_standalone(lark_inst, output=None, out=sys.stdout): + if output is None: + output = partial(print, file=out) - print('# The file was automatically generated by Lark v%s' % lark.__version__) - print('__version__ = "%s"' % lark.__version__) - print() + output('# The file was automatically generated by Lark v%s' % lark.__version__) + output('__version__ = "%s"' % lark.__version__) + output() for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): with open(os.path.join(_larkdir, pyfile)) as f: code = extract_sections(f)['standalone'] if i: # if not this file code = strip_docstrings(partial(next, iter(code.splitlines(True)))) - print(code) + output(code) data, m = lark_inst.memo_serialize([TerminalDef, Rule]) - print( 'DATA = (' ) + output('DATA = (') # pprint(data, width=160) - print(data) - print(')') - print( 'MEMO = (') - print(m) - print(')') + output(data) + output(')') + output('MEMO = (') + output(m) + output(')') + + output('Shift = 0') + output('Reduce = 1') + output("def Lark_StandAlone(**kwargs):") + output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") - print('Shift = 0') - print('Reduce = 1') - print("def Lark_StandAlone(**kwargs):") - print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") if __name__ == '__main__': - if len(sys.argv) < 2: - print("Lark Stand-alone Generator Tool") - print("Usage: python -m lark.tools.standalone []") - sys.exit(1) - - if len(sys.argv) == 3: - fn, start = sys.argv[1:] - elif len(sys.argv) == 2: - fn, start = sys.argv[1], 'start' - else: - assert False, sys.argv - - with codecs.open(fn, encoding='utf8') as f: - main(f, start) + parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", + parents=[base_argparser], epilog='Look at the Lark documentation for more info on the options') + parser.add_argument("old_start", nargs='?', help=SUPPRESS) + ns = parser.parse_args() + if ns.old_start is not None: + warn('The syntax `python -m lark.tools.standalone ` is deprecated. Use the -s option') + ns.start.append(ns.old_start) + + lark_inst, out = build_lalr(ns) + gen_standalone(lark_inst, out=out) + diff --git a/tests/test_tools.py b/tests/test_tools.py index ce995d8..0972f8f 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function import sys from unittest import TestCase, main -from functools import partial +from lark import Lark from lark.tree import Tree from lark.tools import standalone @@ -21,8 +21,7 @@ class TestStandalone(TestCase): def _create_standalone(self, grammar): code_buf = StringIO() - pr = partial(print, file=code_buf) - standalone.main(StringIO(grammar), 'start', print=pr) + standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf) code = code_buf.getvalue() context = {'__doc__': None} From 2700b794b9007d96e71a15381472c10763100a22 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Tue, 6 Oct 2020 02:27:18 +0200 Subject: [PATCH 2/5] add `--verbose` option & make warnings comments fixup! make warnings comments --- lark/tools/__init__.py | 48 +++++++++++++++++++++++++++++++--------- lark/tools/serialize.py | 4 ++-- lark/tools/standalone.py | 5 +++-- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py index b9e8c01..4ecf13d 100644 --- a/lark/tools/__init__.py +++ b/lark/tools/__init__.py @@ -1,9 +1,16 @@ import sys from argparse import ArgumentParser, FileType -from lark import Lark +try: + from textwrap import indent +except ImportError: + def indent(text, prefix): + return ''.join(prefix + line for line in text.splitlines(True)) +from logging import DEBUG, INFO, WARN, ERROR +import warnings -base_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') +from lark import Lark, logger +lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') flags = [ ('d', 'debug'), @@ -16,22 +23,43 @@ flags = [ options = ['start', 'lexer'] -base_argparser.add_argument('-s', '--start', action='append', default=[]) -base_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) -k = {'encoding':'utf-8'} if sys.version_info > (3, 4) else {} -base_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') -base_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') +lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") +lalr_argparser.add_argument('-s', '--start', action='append', default=[]) +lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) +k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {} +lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') +lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') for f in flags: if isinstance(f, tuple): options.append(f[1]) - base_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') + lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') else: options.append(f) - base_argparser.add_argument('--' + f, action='store_true') + lalr_argparser.add_argument('--' + f, action='store_true') + def build_lalr(namespace): + logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) if len(namespace.start) == 0: namespace.start.append('start') kwargs = {n: getattr(namespace, n) for n in options} - return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out \ No newline at end of file + return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out + + +def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): + # Based on warnings._showwarnmsg_impl + text = warnings.formatwarning(message, category, filename, lineno, line) + text = indent(text, '# ') + if file is None: + file = sys.stderr + if file is None: + return + try: + file.write(text) + except OSError: + pass + + +def make_warnings_comments(): + warnings.showwarning = showwarning_as_comment diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py index fb9b98f..10884eb 100644 --- a/lark/tools/serialize.py +++ b/lark/tools/serialize.py @@ -5,11 +5,11 @@ import json from lark import Lark from lark.grammar import RuleOptions, Rule from lark.lexer import TerminalDef -from lark.tools import base_argparser, build_lalr +from lark.tools import lalr_argparser, build_lalr import argparse -argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[base_argparser], +argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", epilog='Look at the Lark documentation for more info on the options') diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 2705838..4ca57f6 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -40,7 +40,7 @@ from warnings import warn import lark from lark import Lark -from lark.tools import base_argparser, build_lalr +from lark.tools import lalr_argparser, build_lalr, make_warnings_comments from lark.grammar import RuleOptions, Rule @@ -157,8 +157,9 @@ def gen_standalone(lark_inst, output=None, out=sys.stdout): if __name__ == '__main__': + make_warnings_comments() parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", - parents=[base_argparser], epilog='Look at the Lark documentation for more info on the options') + parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') parser.add_argument("old_start", nargs='?', help=SUPPRESS) ns = parser.parse_args() if ns.old_start is not None: From 08677238b6a738876e59aecb379cba3ad3cb52e5 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 6 Oct 2020 21:25:19 +0300 Subject: [PATCH 3/5] Minor edit --- lark/lark.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 776469a..c7bdfa0 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -95,16 +95,16 @@ class LarkOptions(Serialize): """ if __doc__: __doc__ += OPTIONS_DOC - - + + # Adding a new option needs to be done in multiple places: - # - In the dictionary below. This is the primary truth which options `Lark.__init__` takes - # - In the doc string above. It is used both for the docstring of `LarkOptions` and `Lark` + # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts + # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs # - In `lark-stubs/lark.pyi`: # - As attribute to `LarkOptions` # - As parameter to `Lark.__init__` - # - potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded - # - potentially in `lark.tools.__init__`, when it can easily be passed as a cmd argument and makes sense + # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded + # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument _defaults = { 'debug': False, 'keep_all_tokens': False, From 6a9759d620aba8d5a53170f7835b846e91ee2ecd Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 6 Oct 2020 22:25:48 +0300 Subject: [PATCH 4/5] Added optional compression to standalone parser --- lark/tools/standalone.py | 46 +++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 4ca57f6..3456c39 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -123,10 +123,19 @@ def main(fobj, start, print=print): lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) gen_standalone(lark_inst, print) -def gen_standalone(lark_inst, output=None, out=sys.stdout): +def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): if output is None: output = partial(print, file=out) + import pickle, zlib, base64 + def compressed_output(obj): + s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) + c = zlib.compress(s) + output(repr(base64.b85encode(c))) + + def output_decompress(name): + output('%(name)s = pickle.loads(zlib.decompress(base64.b85decode(%(name)s)))' % locals()) + output('# The file was automatically generated by Lark v%s' % lark.__version__) output('__version__ = "%s"' % lark.__version__) output() @@ -139,13 +148,23 @@ def gen_standalone(lark_inst, output=None, out=sys.stdout): output(code) data, m = lark_inst.memo_serialize([TerminalDef, Rule]) - output('DATA = (') - # pprint(data, width=160) - output(data) - output(')') - output('MEMO = (') - output(m) - output(')') + output('import pickle, zlib, base64') + if compress: + output('DATA = (') + compressed_output(data) + output(')') + output_decompress('DATA') + output('MEMO = (') + compressed_output(m) + output(')') + output_decompress('MEMO') + else: + output('DATA = (') + output(data) + output(')') + output('MEMO = (') + output(m) + output(')') output('Shift = 0') @@ -156,16 +175,19 @@ def gen_standalone(lark_inst, output=None, out=sys.stdout): -if __name__ == '__main__': +def main(): make_warnings_comments() parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') parser.add_argument("old_start", nargs='?', help=SUPPRESS) + parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") ns = parser.parse_args() if ns.old_start is not None: warn('The syntax `python -m lark.tools.standalone ` is deprecated. Use the -s option') ns.start.append(ns.old_start) - + lark_inst, out = build_lalr(ns) - gen_standalone(lark_inst, out=out) - + gen_standalone(lark_inst, out=out, compress=ns.compress) + +if __name__ == '__main__': + main() \ No newline at end of file From 6bcff89e3c3a9b94f17a014028efa392c88228ed Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 6 Oct 2020 22:29:05 +0300 Subject: [PATCH 5/5] Added test for standalone compression --- tests/test_tools.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_tools.py b/tests/test_tools.py index 0972f8f..7a732d1 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -19,9 +19,9 @@ class TestStandalone(TestCase): def setUp(self): pass - def _create_standalone(self, grammar): + def _create_standalone(self, grammar, compress=False): code_buf = StringIO() - standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf) + standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) code = code_buf.getvalue() context = {'__doc__': None} @@ -52,6 +52,11 @@ class TestStandalone(TestCase): self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') + context = self._create_standalone(grammar, compress=True) + _Lark = context['Lark_StandAlone'] + l = _Lark() + x = l.parse('12 elephants') + def test_contextual(self): grammar = """ start: a b