| @@ -96,6 +96,15 @@ class LarkOptions(Serialize): | |||
| if __doc__: | |||
| __doc__ += OPTIONS_DOC | |||
| # Adding a new option needs to be done in multiple places: | |||
| # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | |||
| # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | |||
| # - In `lark-stubs/lark.pyi`: | |||
| # - As attribute to `LarkOptions` | |||
| # - As parameter to `Lark.__init__` | |||
| # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | |||
| # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | |||
| _defaults = { | |||
| 'debug': False, | |||
| 'keep_all_tokens': False, | |||
| @@ -163,8 +172,9 @@ class LarkOptions(Serialize): | |||
| return cls(data) | |||
| _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', | |||
| 'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'} | |||
| # Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. | |||
| # These option are only used outside of `load_grammar`. | |||
| _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} | |||
| class Lark(Serialize): | |||
| @@ -0,0 +1,65 @@ | |||
| import sys | |||
| from argparse import ArgumentParser, FileType | |||
| try: | |||
| from textwrap import indent | |||
| except ImportError: | |||
| def indent(text, prefix): | |||
| return ''.join(prefix + line for line in text.splitlines(True)) | |||
| from logging import DEBUG, INFO, WARN, ERROR | |||
| import warnings | |||
| from lark import Lark, logger | |||
| lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') | |||
| flags = [ | |||
| ('d', 'debug'), | |||
| 'keep_all_tokens', | |||
| 'regex', | |||
| 'propagate_positions', | |||
| 'maybe_placeholders', | |||
| 'use_bytes' | |||
| ] | |||
| options = ['start', 'lexer'] | |||
| lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") | |||
| lalr_argparser.add_argument('-s', '--start', action='append', default=[]) | |||
| lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) | |||
| k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {} | |||
| lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') | |||
| lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') | |||
| for f in flags: | |||
| if isinstance(f, tuple): | |||
| options.append(f[1]) | |||
| lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') | |||
| else: | |||
| options.append(f) | |||
| lalr_argparser.add_argument('--' + f, action='store_true') | |||
| def build_lalr(namespace): | |||
| logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) | |||
| if len(namespace.start) == 0: | |||
| namespace.start.append('start') | |||
| kwargs = {n: getattr(namespace, n) for n in options} | |||
| return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out | |||
| def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): | |||
| # Based on warnings._showwarnmsg_impl | |||
| text = warnings.formatwarning(message, category, filename, lineno, line) | |||
| text = indent(text, '# ') | |||
| if file is None: | |||
| file = sys.stderr | |||
| if file is None: | |||
| return | |||
| try: | |||
| file.write(text) | |||
| except OSError: | |||
| pass | |||
| def make_warnings_comments(): | |||
| warnings.showwarning = showwarning_as_comment | |||
| @@ -5,20 +5,16 @@ import json | |||
| from lark import Lark | |||
| from lark.grammar import RuleOptions, Rule | |||
| from lark.lexer import TerminalDef | |||
| from lark.tools import lalr_argparser, build_lalr | |||
| import argparse | |||
| argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''') | |||
| argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], | |||
| description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", | |||
| epilog='Look at the Lark documentation for more info on the options') | |||
| argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file') | |||
| argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)') | |||
| argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+') | |||
| argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")') | |||
| def serialize(infile, outfile, lexer, start): | |||
| lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual | |||
| def serialize(lark_inst, outfile): | |||
| data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) | |||
| outfile.write('{\n') | |||
| outfile.write(' "data": %s,\n' % json.dumps(data)) | |||
| @@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start): | |||
| def main(): | |||
| if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv: | |||
| print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file") | |||
| print("") | |||
| argparser.print_help() | |||
| else: | |||
| args = argparser.parse_args() | |||
| serialize(args.grammar_file, args.out, args.lexer, args.start) | |||
| ns = argparser.parse_args() | |||
| serialize(*build_lalr(ns)) | |||
| if __name__ == '__main__': | |||
| main() | |||
| main() | |||
| @@ -26,22 +26,21 @@ from __future__ import print_function | |||
| # | |||
| # | |||
| import os | |||
| from io import open | |||
| ###} | |||
| import codecs | |||
| import sys | |||
| import token, tokenize | |||
| import os | |||
| from pprint import pprint | |||
| from os import path | |||
| from collections import defaultdict | |||
| from functools import partial | |||
| from argparse import ArgumentParser, SUPPRESS | |||
| from warnings import warn | |||
| import lark | |||
| from lark import Lark | |||
| from lark.parsers.lalr_analysis import Reduce | |||
| from lark.tools import lalr_argparser, build_lalr, make_warnings_comments | |||
| from lark.grammar import RuleOptions, Rule | |||
| @@ -120,48 +119,75 @@ def strip_docstrings(line_gen): | |||
| def main(fobj, start, print=print): | |||
| warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) | |||
| lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | |||
| gen_standalone(lark_inst, print) | |||
| def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): | |||
| if output is None: | |||
| output = partial(print, file=out) | |||
| import pickle, zlib, base64 | |||
| def compressed_output(obj): | |||
| s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) | |||
| c = zlib.compress(s) | |||
| output(repr(base64.b85encode(c))) | |||
| print('# The file was automatically generated by Lark v%s' % lark.__version__) | |||
| print('__version__ = "%s"' % lark.__version__) | |||
| print() | |||
| def output_decompress(name): | |||
| output('%(name)s = pickle.loads(zlib.decompress(base64.b85decode(%(name)s)))' % locals()) | |||
| output('# The file was automatically generated by Lark v%s' % lark.__version__) | |||
| output('__version__ = "%s"' % lark.__version__) | |||
| output() | |||
| for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): | |||
| with open(os.path.join(_larkdir, pyfile)) as f: | |||
| code = extract_sections(f)['standalone'] | |||
| if i: # if not this file | |||
| code = strip_docstrings(partial(next, iter(code.splitlines(True)))) | |||
| print(code) | |||
| output(code) | |||
| data, m = lark_inst.memo_serialize([TerminalDef, Rule]) | |||
| print( 'DATA = (' ) | |||
| # pprint(data, width=160) | |||
| print(data) | |||
| print(')') | |||
| print( 'MEMO = (') | |||
| print(m) | |||
| print(')') | |||
| output('import pickle, zlib, base64') | |||
| if compress: | |||
| output('DATA = (') | |||
| compressed_output(data) | |||
| output(')') | |||
| output_decompress('DATA') | |||
| output('MEMO = (') | |||
| compressed_output(m) | |||
| output(')') | |||
| output_decompress('MEMO') | |||
| else: | |||
| output('DATA = (') | |||
| output(data) | |||
| output(')') | |||
| output('MEMO = (') | |||
| output(m) | |||
| output(')') | |||
| print('Shift = 0') | |||
| print('Reduce = 1') | |||
| print("def Lark_StandAlone(**kwargs):") | |||
| print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") | |||
| output('Shift = 0') | |||
| output('Reduce = 1') | |||
| output("def Lark_StandAlone(**kwargs):") | |||
| output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") | |||
| if __name__ == '__main__': | |||
| if len(sys.argv) < 2: | |||
| print("Lark Stand-alone Generator Tool") | |||
| print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]") | |||
| sys.exit(1) | |||
| if len(sys.argv) == 3: | |||
| fn, start = sys.argv[1:] | |||
| elif len(sys.argv) == 2: | |||
| fn, start = sys.argv[1], 'start' | |||
| else: | |||
| assert False, sys.argv | |||
| with codecs.open(fn, encoding='utf8') as f: | |||
| main(f, start) | |||
| def main(): | |||
| make_warnings_comments() | |||
| parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", | |||
| parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') | |||
| parser.add_argument("old_start", nargs='?', help=SUPPRESS) | |||
| parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") | |||
| ns = parser.parse_args() | |||
| if ns.old_start is not None: | |||
| warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option') | |||
| ns.start.append(ns.old_start) | |||
| lark_inst, out = build_lalr(ns) | |||
| gen_standalone(lark_inst, out=out, compress=ns.compress) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function | |||
| import sys | |||
| from unittest import TestCase, main | |||
| from functools import partial | |||
| from lark import Lark | |||
| from lark.tree import Tree | |||
| from lark.tools import standalone | |||
| @@ -19,10 +19,9 @@ class TestStandalone(TestCase): | |||
| def setUp(self): | |||
| pass | |||
| def _create_standalone(self, grammar): | |||
| def _create_standalone(self, grammar, compress=False): | |||
| code_buf = StringIO() | |||
| pr = partial(print, file=code_buf) | |||
| standalone.main(StringIO(grammar), 'start', print=pr) | |||
| standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) | |||
| code = code_buf.getvalue() | |||
| context = {'__doc__': None} | |||
| @@ -53,6 +52,11 @@ class TestStandalone(TestCase): | |||
| self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') | |||
| self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') | |||
| context = self._create_standalone(grammar, compress=True) | |||
| _Lark = context['Lark_StandAlone'] | |||
| l = _Lark() | |||
| x = l.parse('12 elephants') | |||
| def test_contextual(self): | |||
| grammar = """ | |||
| start: a b | |||