@@ -96,6 +96,15 @@ class LarkOptions(Serialize): | |||||
if __doc__: | if __doc__: | ||||
__doc__ += OPTIONS_DOC | __doc__ += OPTIONS_DOC | ||||
# Adding a new option needs to be done in multiple places: | |||||
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | |||||
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | |||||
# - In `lark-stubs/lark.pyi`: | |||||
# - As attribute to `LarkOptions` | |||||
# - As parameter to `Lark.__init__` | |||||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | |||||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | |||||
_defaults = { | _defaults = { | ||||
'debug': False, | 'debug': False, | ||||
'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
@@ -163,8 +172,9 @@ class LarkOptions(Serialize): | |||||
return cls(data) | return cls(data) | ||||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', | |||||
'regex', 'propagate_positions', 'keep_all_tokens', 'tree_class'} | |||||
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. | |||||
# These option are only used outside of `load_grammar`. | |||||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} | |||||
class Lark(Serialize): | class Lark(Serialize): | ||||
@@ -0,0 +1,65 @@ | |||||
import sys | |||||
from argparse import ArgumentParser, FileType | |||||
try: | |||||
from textwrap import indent | |||||
except ImportError: | |||||
def indent(text, prefix): | |||||
return ''.join(prefix + line for line in text.splitlines(True)) | |||||
from logging import DEBUG, INFO, WARN, ERROR | |||||
import warnings | |||||
from lark import Lark, logger | |||||
lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') | |||||
flags = [ | |||||
('d', 'debug'), | |||||
'keep_all_tokens', | |||||
'regex', | |||||
'propagate_positions', | |||||
'maybe_placeholders', | |||||
'use_bytes' | |||||
] | |||||
options = ['start', 'lexer'] | |||||
lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") | |||||
lalr_argparser.add_argument('-s', '--start', action='append', default=[]) | |||||
lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) | |||||
k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {} | |||||
lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') | |||||
lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') | |||||
for f in flags: | |||||
if isinstance(f, tuple): | |||||
options.append(f[1]) | |||||
lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') | |||||
else: | |||||
options.append(f) | |||||
lalr_argparser.add_argument('--' + f, action='store_true') | |||||
def build_lalr(namespace): | |||||
logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) | |||||
if len(namespace.start) == 0: | |||||
namespace.start.append('start') | |||||
kwargs = {n: getattr(namespace, n) for n in options} | |||||
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out | |||||
def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): | |||||
# Based on warnings._showwarnmsg_impl | |||||
text = warnings.formatwarning(message, category, filename, lineno, line) | |||||
text = indent(text, '# ') | |||||
if file is None: | |||||
file = sys.stderr | |||||
if file is None: | |||||
return | |||||
try: | |||||
file.write(text) | |||||
except OSError: | |||||
pass | |||||
def make_warnings_comments(): | |||||
warnings.showwarning = showwarning_as_comment |
@@ -5,20 +5,16 @@ import json | |||||
from lark import Lark | from lark import Lark | ||||
from lark.grammar import RuleOptions, Rule | from lark.grammar import RuleOptions, Rule | ||||
from lark.lexer import TerminalDef | from lark.lexer import TerminalDef | ||||
from lark.tools import lalr_argparser, build_lalr | |||||
import argparse | import argparse | ||||
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''') | |||||
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], | |||||
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", | |||||
epilog='Look at the Lark documentation for more info on the options') | |||||
argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file') | |||||
argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)') | |||||
argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+') | |||||
argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")') | |||||
def serialize(infile, outfile, lexer, start): | |||||
lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual | |||||
def serialize(lark_inst, outfile): | |||||
data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) | data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) | ||||
outfile.write('{\n') | outfile.write('{\n') | ||||
outfile.write(' "data": %s,\n' % json.dumps(data)) | outfile.write(' "data": %s,\n' % json.dumps(data)) | ||||
@@ -27,13 +23,9 @@ def serialize(infile, outfile, lexer, start): | |||||
def main(): | def main(): | ||||
if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv: | |||||
print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file") | |||||
print("") | |||||
argparser.print_help() | |||||
else: | |||||
args = argparser.parse_args() | |||||
serialize(args.grammar_file, args.out, args.lexer, args.start) | |||||
ns = argparser.parse_args() | |||||
serialize(*build_lalr(ns)) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
main() | |||||
main() |
@@ -26,22 +26,21 @@ from __future__ import print_function | |||||
# | # | ||||
# | # | ||||
import os | |||||
from io import open | from io import open | ||||
###} | ###} | ||||
import codecs | |||||
import sys | import sys | ||||
import token, tokenize | import token, tokenize | ||||
import os | import os | ||||
from pprint import pprint | |||||
from os import path | from os import path | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from functools import partial | from functools import partial | ||||
from argparse import ArgumentParser, SUPPRESS | |||||
from warnings import warn | |||||
import lark | import lark | ||||
from lark import Lark | from lark import Lark | ||||
from lark.parsers.lalr_analysis import Reduce | |||||
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments | |||||
from lark.grammar import RuleOptions, Rule | from lark.grammar import RuleOptions, Rule | ||||
@@ -120,48 +119,75 @@ def strip_docstrings(line_gen): | |||||
def main(fobj, start, print=print): | def main(fobj, start, print=print): | ||||
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) | |||||
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | ||||
gen_standalone(lark_inst, print) | |||||
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): | |||||
if output is None: | |||||
output = partial(print, file=out) | |||||
import pickle, zlib, base64 | |||||
def compressed_output(obj): | |||||
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) | |||||
c = zlib.compress(s) | |||||
output(repr(base64.b85encode(c))) | |||||
print('# The file was automatically generated by Lark v%s' % lark.__version__) | |||||
print('__version__ = "%s"' % lark.__version__) | |||||
print() | |||||
def output_decompress(name): | |||||
output('%(name)s = pickle.loads(zlib.decompress(base64.b85decode(%(name)s)))' % locals()) | |||||
output('# The file was automatically generated by Lark v%s' % lark.__version__) | |||||
output('__version__ = "%s"' % lark.__version__) | |||||
output() | |||||
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): | for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): | ||||
with open(os.path.join(_larkdir, pyfile)) as f: | with open(os.path.join(_larkdir, pyfile)) as f: | ||||
code = extract_sections(f)['standalone'] | code = extract_sections(f)['standalone'] | ||||
if i: # if not this file | if i: # if not this file | ||||
code = strip_docstrings(partial(next, iter(code.splitlines(True)))) | code = strip_docstrings(partial(next, iter(code.splitlines(True)))) | ||||
print(code) | |||||
output(code) | |||||
data, m = lark_inst.memo_serialize([TerminalDef, Rule]) | data, m = lark_inst.memo_serialize([TerminalDef, Rule]) | ||||
print( 'DATA = (' ) | |||||
# pprint(data, width=160) | |||||
print(data) | |||||
print(')') | |||||
print( 'MEMO = (') | |||||
print(m) | |||||
print(')') | |||||
output('import pickle, zlib, base64') | |||||
if compress: | |||||
output('DATA = (') | |||||
compressed_output(data) | |||||
output(')') | |||||
output_decompress('DATA') | |||||
output('MEMO = (') | |||||
compressed_output(m) | |||||
output(')') | |||||
output_decompress('MEMO') | |||||
else: | |||||
output('DATA = (') | |||||
output(data) | |||||
output(')') | |||||
output('MEMO = (') | |||||
output(m) | |||||
output(')') | |||||
print('Shift = 0') | |||||
print('Reduce = 1') | |||||
print("def Lark_StandAlone(**kwargs):") | |||||
print(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") | |||||
output('Shift = 0') | |||||
output('Reduce = 1') | |||||
output("def Lark_StandAlone(**kwargs):") | |||||
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") | |||||
if __name__ == '__main__': | |||||
if len(sys.argv) < 2: | |||||
print("Lark Stand-alone Generator Tool") | |||||
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]") | |||||
sys.exit(1) | |||||
if len(sys.argv) == 3: | |||||
fn, start = sys.argv[1:] | |||||
elif len(sys.argv) == 2: | |||||
fn, start = sys.argv[1], 'start' | |||||
else: | |||||
assert False, sys.argv | |||||
with codecs.open(fn, encoding='utf8') as f: | |||||
main(f, start) | |||||
def main(): | |||||
make_warnings_comments() | |||||
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", | |||||
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') | |||||
parser.add_argument("old_start", nargs='?', help=SUPPRESS) | |||||
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") | |||||
ns = parser.parse_args() | |||||
if ns.old_start is not None: | |||||
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option') | |||||
ns.start.append(ns.old_start) | |||||
lark_inst, out = build_lalr(ns) | |||||
gen_standalone(lark_inst, out=out, compress=ns.compress) | |||||
if __name__ == '__main__': | |||||
main() |
@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function | |||||
import sys | import sys | ||||
from unittest import TestCase, main | from unittest import TestCase, main | ||||
from functools import partial | |||||
from lark import Lark | |||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.tools import standalone | from lark.tools import standalone | ||||
@@ -19,10 +19,9 @@ class TestStandalone(TestCase): | |||||
def setUp(self): | def setUp(self): | ||||
pass | pass | ||||
def _create_standalone(self, grammar): | |||||
def _create_standalone(self, grammar, compress=False): | |||||
code_buf = StringIO() | code_buf = StringIO() | ||||
pr = partial(print, file=code_buf) | |||||
standalone.main(StringIO(grammar), 'start', print=pr) | |||||
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) | |||||
code = code_buf.getvalue() | code = code_buf.getvalue() | ||||
context = {'__doc__': None} | context = {'__doc__': None} | ||||
@@ -53,6 +52,11 @@ class TestStandalone(TestCase): | |||||
self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') | self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') | ||||
self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') | self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') | ||||
context = self._create_standalone(grammar, compress=True) | |||||
_Lark = context['Lark_StandAlone'] | |||||
l = _Lark() | |||||
x = l.parse('12 elephants') | |||||
def test_contextual(self): | def test_contextual(self): | ||||
grammar = """ | grammar = """ | ||||
start: a b | start: a b | ||||