diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1630c8b..c7b9286 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9.0-rc - 3.9, pypy2, pypy3] + python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10.0-rc - 3.10, pypy2, pypy3] steps: - uses: actions/checkout@v2 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6448cc8..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -dist: xenial -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9-dev" - - "pypy2.7-6.0" - - "pypy3.5-6.0" -install: pip install tox-travis -script: - - tox diff --git a/README.md b/README.md index 8ec22ed..156a671 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h - [Documentation @readthedocs](https://lark-parser.readthedocs.io/) - [Cheatsheet (PDF)](/docs/_static/lark_cheatsheet.pdf) -- [Online IDE (very basic)](https://lark-parser.github.io/lark/ide/app.html) +- [Online IDE](https://lark-parser.github.io/ide) - [Tutorial](/docs/json_tutorial.md) for writing a JSON parser. - Blog post: [How to write a DSL with Lark](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - [Gitter chat](https://gitter.im/lark-parser/Lobby) @@ -37,7 +37,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h Lark has no dependencies. -[![Build Status](https://travis-ci.org/lark-parser/lark.svg?branch=master)](https://travis-ci.org/lark-parser/lark) +[![Tests](https://github.com/lark-parser/lark/actions/workflows/tests.yml/badge.svg)](https://github.com/lark-parser/lark/actions/workflows/tests.yml) ### Syntax Highlighting @@ -51,7 +51,10 @@ Lark provides syntax highlighting for its grammar files (\*.lark): ### Clones +These are implementations of Lark in other languages. They accept Lark grammars, and provide similar utilities. + - [Lerche (Julia)](https://github.com/jamesrhester/Lerche.jl) - an unofficial clone, written entirely in Julia. +- [Lark.js (Javascript)](https://github.com/lark-parser/lark.js) - a port of the stand-alone LALR(1) parser generator to Javascsript. ### Hello World @@ -143,6 +146,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail ### Projects using Lark + - [Poetry](https://github.com/python-poetry/poetry-core) - A utility for dependency management and packaging - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL server by Dailymotion - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration diff --git a/docs/classes.rst b/docs/classes.rst index 7b18460..1287896 100644 --- a/docs/classes.rst +++ b/docs/classes.rst @@ -66,6 +66,8 @@ UnexpectedInput .. autoclass:: lark.exceptions.UnexpectedCharacters +.. autoclass:: lark.exceptions.UnexpectedEOF + InteractiveParser ----------------- diff --git a/docs/grammar.md b/docs/grammar.md index 0d77420..4ac5c77 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -159,14 +159,15 @@ start : (A | B)+ A : "a" | "ab" B : "b" ``` -We get this behavior: +We get only one possible derivation, instead of two: ```bash +>>> p = Lark(g, ambiguity="explicit") >>> p.parse("ab") -Tree(start, [Token(A, 'a'), Token(B, 'b')]) +Tree('start', [Token('A', 'ab')]) ``` -This is happening because Python's regex engine always returns the first matching option. +This is happening because Python's regex engine always returns the best matching option. There is no way to access the alternatives. If you find yourself in this situation, the recommended solution is to use rules instead. diff --git a/docs/index.rst b/docs/index.rst index 39ecd5a..e8bd6b2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -113,7 +113,7 @@ Resources .. _Examples: https://github.com/lark-parser/lark/tree/master/examples .. _Third-party examples: https://github.com/ligurio/lark-grammars -.. _Online IDE: https://lark-parser.github.io/lark/ide/app.html +.. _Online IDE: https://lark-parser.github.io/ide .. _How to write a DSL: http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/ .. _Program Synthesis is Possible: https://www.cs.cornell.edu/~asampson/blog/minisynth.html .. _Cheatsheet (PDF): _static/lark_cheatsheet.pdf diff --git a/docs/visitors.rst b/docs/visitors.rst index a0e1711..43d0513 100644 --- a/docs/visitors.rst +++ b/docs/visitors.rst @@ -103,7 +103,17 @@ v_args .. autofunction:: lark.visitors.v_args +merge_transformers +------------------ + +.. autofunction:: lark.visitors.merge_transformers + Discard ------- .. autoclass:: lark.visitors.Discard + +VisitError +---------- + +.. autoclass:: lark.exceptions.VisitError \ No newline at end of file diff --git a/examples/advanced/create_ast.py b/examples/advanced/create_ast.py index 537e8a8..95ce520 100644 --- a/examples/advanced/create_ast.py +++ b/examples/advanced/create_ast.py @@ -15,6 +15,7 @@ from typing import List from dataclasses import dataclass from lark import Lark, ast_utils, Transformer, v_args +from lark.tree import Meta this_module = sys.modules[__name__] @@ -31,7 +32,9 @@ class _Statement(_Ast): pass @dataclass -class Value(_Ast): +class Value(_Ast, ast_utils.WithMeta): + "Uses WithMeta to include line-number metadata in the meta attribute" + meta: Meta value: object @dataclass diff --git a/examples/advanced/extend_python.py b/examples/advanced/extend_python.py index bdc7149..ba5fa21 100644 --- a/examples/advanced/extend_python.py +++ b/examples/advanced/extend_python.py @@ -39,7 +39,7 @@ def name(n): """, start='file_input') -# Remove the 'python3__' prefix that was add to the implicitely imported rules. +# Remove the 'python3__' prefix that was added to the implicitly imported rules. for t in tree.iter_subtrees(): t.data = t.data.rsplit('__', 1)[-1] diff --git a/examples/advanced/python3.lark b/examples/advanced/python3.lark index 0fc5949..cb3b077 100644 --- a/examples/advanced/python3.lark +++ b/examples/advanced/python3.lark @@ -1,8 +1,6 @@ // Python 3 grammar for Lark -// NOTE: Work in progress!!! (XXX TODO) -// This grammar should parse all python 3.x code successfully, -// but the resulting parse-tree is still not well-organized. +// This grammar should parse all python 3.x code successfully. // Adapted from: https://docs.python.org/3/reference/grammar.html // Adapted by: Erez Shinan @@ -21,7 +19,7 @@ decorators: decorator+ decorated: decorators (classdef | funcdef | async_funcdef) async_funcdef: "async" funcdef -funcdef: "def" NAME "(" parameters? ")" ["->" test] ":" suite +funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] | starparams @@ -29,25 +27,36 @@ parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] -kwparams: "**" typedparam +kwparams: "**" typedparam ","? -?paramvalue: typedparam ["=" test] -?typedparam: NAME [":" test] +?paramvalue: typedparam ("=" test)? +?typedparam: NAME (":" test)? -varargslist: (vfpdef ["=" test] ("," vfpdef ["=" test])* ["," [ "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]] | "**" vfpdef [","]]] - | "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]] - | "**" vfpdef [","]) -vfpdef: NAME +lambdef: "lambda" [lambda_params] ":" test +lambdef_nocond: "lambda" [lambda_params] ":" test_nocond +lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] + | lambda_starparams + | lambda_kwparams +?lambda_paramvalue: NAME ("=" test)? +lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] +lambda_kwparams: "**" NAME ","? + ?stmt: simple_stmt | compound_stmt ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE -?small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -?expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) - | ("=" (yield_expr|testlist_star_expr))*) -annassign: ":" test ["=" test] -?testlist_star_expr: (test|star_expr) ("," (test|star_expr))* [","] -!augassign: ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=") +?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr +assign_stmt: annassign | augassign | assign + +annassign: testlist_star_expr ":" test ["=" test] +assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ +augassign: testlist_star_expr augassign_op (yield_expr|testlist) +!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" +?testlist_star_expr: test_or_star_expr + | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple + | test_or_star_expr "," -> tuple + // For normal and annotated assignments, additional restrictions enforced by the interpreter del_stmt: "del" exprlist pass_stmt: "pass" @@ -71,43 +80,52 @@ global_stmt: "global" NAME ("," NAME)* nonlocal_stmt: "nonlocal" NAME ("," NAME)* assert_stmt: "assert" test ["," test] -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt async_stmt: "async" (funcdef | with_stmt | for_stmt) -if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite] +if_stmt: "if" test ":" suite elifs ["else" ":" suite] +elifs: elif_* +elif_: "elif" test ":" suite while_stmt: "while" test ":" suite ["else" ":" suite] for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] -try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite)) -with_stmt: "with" with_item ("," with_item)* ":" suite +try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] + | "try" ":" suite finally -> try_finally +finally: "finally" ":" suite +except_clauses: except_clause+ +except_clause: "except" [test ["as" NAME]] ":" suite + +with_stmt: "with" with_items ":" suite +with_items: with_item ("," with_item)* with_item: test ["as" expr] // NB compile.c makes sure that the default except clause is last -except_clause: "except" [test ["as" NAME]] suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT -?test: or_test ("if" or_test "else" test)? | lambdef +?test: or_test ("if" or_test "else" test)? + | lambdef ?test_nocond: or_test | lambdef_nocond -lambdef: "lambda" [varargslist] ":" test -lambdef_nocond: "lambda" [varargslist] ":" test_nocond + ?or_test: and_test ("or" and_test)* -?and_test: not_test ("and" not_test)* -?not_test: "not" not_test -> not +?and_test: not_test_ ("and" not_test_)* +?not_test_: "not" not_test_ -> not_test | comparison -?comparison: expr (_comp_op expr)* +?comparison: expr (comp_op expr)* star_expr: "*" expr -?expr: xor_expr ("|" xor_expr)* + +?expr: or_expr +?or_expr: xor_expr ("|" xor_expr)* ?xor_expr: and_expr ("^" and_expr)* ?and_expr: shift_expr ("&" shift_expr)* ?shift_expr: arith_expr (_shift_op arith_expr)* ?arith_expr: term (_add_op term)* ?term: factor (_mul_op factor)* -?factor: _factor_op factor | power +?factor: _unary_op factor | power -!_factor_op: "+"|"-"|"~" +!_unary_op: "+"|"-"|"~" !_add_op: "+"|"-" !_shift_op: "<<"|">>" !_mul_op: "*"|"@"|"/"|"%"|"//" // <> isn't actually a valid comparison operator in Python. It's here for the // sake of a __future__ import described in PEP 401 (which really works :-) -!_comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" +!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" ?power: await_expr ("**" factor)? ?await_expr: AWAIT? atom_expr @@ -118,61 +136,75 @@ AWAIT: "await" | atom_expr "." NAME -> getattr | atom -?atom: "(" [yield_expr|tuplelist_comp] ")" -> tuple - | "[" [testlist_comp] "]" -> list - | "{" [dict_comp] "}" -> dict - | "{" set_comp "}" -> set +?atom: "(" yield_expr ")" + | "(" _tuple_inner? ")" -> tuple + | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension + | "[" _testlist_comp? "]" -> list + | "[" comprehension{test_or_star_expr} "]" -> list_comprehension + | "{" _dict_exprlist? "}" -> dict + | "{" comprehension{key_value} "}" -> dict_comprehension + | "{" _set_exprlist "}" -> set + | "{" comprehension{test} "}" -> set_comprehension | NAME -> var - | number | string+ + | number + | string_concat | "(" test ")" | "..." -> ellipsis | "None" -> const_none | "True" -> const_true | "False" -> const_false -?testlist_comp: test | tuplelist_comp -tuplelist_comp: (test|star_expr) (comp_for | ("," (test|star_expr))+ [","] | ",") + +?string_concat: string+ + +_testlist_comp: test | _tuple_inner +_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") + + +?test_or_star_expr: test + | star_expr + ?subscriptlist: subscript | subscript (("," subscript)+ [","] | ",") -> subscript_tuple -subscript: test | ([test] ":" [test] [sliceop]) -> slice +?subscript: test | ([test] ":" [test] [sliceop]) -> slice sliceop: ":" [test] -exprlist: (expr|star_expr) - | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") -> exprlist_tuple -testlist: test | testlist_tuple +?exprlist: (expr|star_expr) + | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") +?testlist: test | testlist_tuple testlist_tuple: test (("," test)+ [","] | ",") -dict_comp: key_value comp_for - | (key_value | "**" expr) ("," (key_value | "**" expr))* [","] +_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test -set_comp: test comp_for - | (test|star_expr) ("," (test | star_expr))* [","] +_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] classdef: "class" NAME ["(" [arguments] ")"] ":" suite + + arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | starargs | kwargs - | test comp_for + | comprehension{test} -starargs: "*" test ("," "*" test)* ("," argvalue)* ["," kwargs] +starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] +stararg: "*" test kwargs: "**" test ?argvalue: test ("=" test)? - -comp_iter: comp_for | comp_if | async_for -async_for: "async" "for" exprlist "in" or_test [comp_iter] -comp_for: "for" exprlist "in" or_test [comp_iter] -comp_if: "if" test_nocond [comp_iter] +comprehension{comp_result}: comp_result comp_fors [comp_if] +comp_fors: comp_for+ +comp_for: [ASYNC] "for" exprlist "in" or_test +ASYNC: "async" +?comp_if: "if" test_nocond // not used in grammar, but may appear in "node" passed from Parser to Compiler encoding_decl: NAME -yield_expr: "yield" [yield_arg] -yield_arg: "from" test | testlist - +yield_expr: "yield" [testlist] + | "yield" "from" test -> yield_from number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER string: STRING | LONG_STRING @@ -181,6 +213,7 @@ string: STRING | LONG_STRING %import python (NAME, COMMENT, STRING, LONG_STRING) %import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER) + // Other terminals _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ diff --git a/examples/advanced/python_bytecode.py b/examples/advanced/python_bytecode.py deleted file mode 100644 index 6165e82..0000000 --- a/examples/advanced/python_bytecode.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -Compile Python to Bytecode -========================== - -A toy example that compiles Python directly to bytecode, without generating an AST. -It currently only works for very very simple Python code. - -It requires the 'bytecode' library. You can get it using -:: - - $ pip install bytecode - -""" -from lark import Lark, Transformer, v_args -from lark.indenter import Indenter - -from bytecode import Instr, Bytecode - -class PythonIndenter(Indenter): - NL_type = '_NEWLINE' - OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] - CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] - INDENT_type = '_INDENT' - DEDENT_type = '_DEDENT' - tab_len = 8 - - -@v_args(inline=True) -class Compile(Transformer): - def number(self, n): - return [Instr('LOAD_CONST', int(n))] - def string(self, s): - return [Instr('LOAD_CONST', s[1:-1])] - def var(self, n): - return [Instr('LOAD_NAME', n)] - - def arith_expr(self, a, op, b): - # TODO support chain arithmetic - assert op == '+' - return a + b + [Instr('BINARY_ADD')] - - def arguments(self, args): - return args - - def funccall(self, name, args): - return name + args + [Instr('CALL_FUNCTION', 1)] - - @v_args(inline=False) - def file_input(self, stmts): - return sum(stmts, []) + [Instr("RETURN_VALUE")] - - def expr_stmt(self, lval, rval): - # TODO more complicated than that - name ,= lval - assert name.name == 'LOAD_NAME' # XXX avoid with another layer of abstraction - return rval + [Instr("STORE_NAME", name.arg)] - - def __default__(self, *args): - assert False, args - - -python_parser3 = Lark.open('python3.lark', rel_to=__file__, start='file_input', - parser='lalr', postlex=PythonIndenter(), - transformer=Compile(), propagate_positions=False) - -def compile_python(s): - insts = python_parser3.parse(s+"\n") - return Bytecode(insts).to_code() - -code = compile_python(""" -a = 3 -b = 5 -print("Hello World!") -print(a+(b+2)) -print((a+b)+2) -""") -exec(code) -# -- Output -- -# Hello World! -# 10 -# 10 diff --git a/examples/composition/README.md b/examples/composition/README.md new file mode 100644 index 0000000..259a66a --- /dev/null +++ b/examples/composition/README.md @@ -0,0 +1,10 @@ +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +We show how, by using namespaces, Lark grammars and their transformers can be fully reused - +they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. + +See [``main.py``](main.py) for more details. \ No newline at end of file diff --git a/examples/composition/combined_csv_and_json.txt b/examples/composition/combined_csv_and_json.txt new file mode 100644 index 0000000..5b8df82 --- /dev/null +++ b/examples/composition/combined_csv_and_json.txt @@ -0,0 +1,6 @@ +{"header": ["this", "is", "json", 1111]} +# file lines author +data.json 12 Robin +data.csv 30 erezsh +compiler.py 123123 Megalng +{"footer": "done"} diff --git a/examples/composition/csv.lark b/examples/composition/csv.lark new file mode 100644 index 0000000..cc2b675 --- /dev/null +++ b/examples/composition/csv.lark @@ -0,0 +1,14 @@ +start: header _NL row+ +header: "#" " "? (WORD _SEPARATOR?)+ +row: (_anything _SEPARATOR?)+ _NL +_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ +_SEPARATOR: /[ ]+/ + | "\t" + | "," + +%import common.NEWLINE -> _NL +%import common.WORD +%import common.INT +%import common.FLOAT +%import common.SIGNED_FLOAT diff --git a/examples/composition/eval_csv.py b/examples/composition/eval_csv.py new file mode 100644 index 0000000..8b83f08 --- /dev/null +++ b/examples/composition/eval_csv.py @@ -0,0 +1,26 @@ +"Transformer for evaluating csv.lark" + +from lark import Transformer + +class CsvTreeToPandasDict(Transformer): + INT = int + FLOAT = float + SIGNED_FLOAT = float + WORD = str + NON_SEPARATOR_STRING = str + + def row(self, children): + return children + + def start(self, children): + data = {} + + header = children[0].children + for heading in header: + data[heading] = [] + + for row in children[1:]: + for i, element in enumerate(row): + data[header[i]].append(element) + + return data diff --git a/examples/composition/eval_json.py b/examples/composition/eval_json.py new file mode 100644 index 0000000..c665a19 --- /dev/null +++ b/examples/composition/eval_json.py @@ -0,0 +1,17 @@ +"Transformer for evaluating json.lark" + +from lark import Transformer, v_args + +class JsonTreeToJson(Transformer): + @v_args(inline=True) + def string(self, s): + return s[1:-1].replace('\\"', '"') + + array = list + pair = tuple + object = dict + number = v_args(inline=True)(float) + + null = lambda self, _: None + true = lambda self, _: True + false = lambda self, _: False diff --git a/examples/composition/json.lark b/examples/composition/json.lark new file mode 100644 index 0000000..bb77c35 --- /dev/null +++ b/examples/composition/json.lark @@ -0,0 +1,19 @@ +?start: value + +?value: object + | array + | string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + +array : "[" _WS? [value ("," _WS? value)*] "]" +object : "{" _WS? [pair ("," _WS? pair)*] "}" +pair : string ":" _WS value + +string : ESCAPED_STRING + +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.WS -> _WS diff --git a/examples/composition/main.py b/examples/composition/main.py new file mode 100644 index 0000000..c6f150f --- /dev/null +++ b/examples/composition/main.py @@ -0,0 +1,51 @@ +""" +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, + and allows them to be used one after the other. + + In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), + which creates an implicit namespace and allows them to coexist without collisions. + +2) We merge their respective transformers (unaware of each other) into a new base transformer. + The resulting transformer can evaluate both JSON and CSV in the parse tree. + + The methods of each transformer are renamed into their appropriate namespace, using the given prefix. + This approach allows full re-use: the transformers don't need to care if their grammar is used directly, + or being imported, or who is doing the importing. + +""" +from pathlib import Path +from lark import Lark +from json import dumps +from lark.visitors import Transformer, merge_transformers + +from eval_csv import CsvTreeToPandasDict +from eval_json import JsonTreeToJson + +__dir__ = Path(__file__).parent + +class Storage(Transformer): + def start(self, children): + return children + +storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) + +parser = Lark.open("storage.lark", rel_to=__file__) + +def main(): + json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) + res = storage_transformer.transform(json_tree) + print("Just JSON: ", res) + + csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) + res = storage_transformer.transform(csv_json_tree) + print("JSON + CSV: ", dumps(res, indent=2)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/composition/storage.lark b/examples/composition/storage.lark new file mode 100644 index 0000000..09bb0ae --- /dev/null +++ b/examples/composition/storage.lark @@ -0,0 +1,9 @@ +start: (csv__start | json__start _NL?)+ + +// Renaming of the import variables is required, as they receive the namespace of this file. +// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 +%import .csv.start -> csv__start +%import .json.start -> json__start + +%import .csv._NL -> _NL + diff --git a/examples/standalone/json_parser_main.py b/examples/standalone/json_parser_main.py index 503b249..3d9b5a6 100644 --- a/examples/standalone/json_parser_main.py +++ b/examples/standalone/json_parser_main.py @@ -10,7 +10,9 @@ Standalone Parser import sys -from json_parser import Lark_StandAlone, Transformer, inline_args +from json_parser import Lark_StandAlone, Transformer, v_args + +inline_args = v_args(inline=True) class TreeToJson(Transformer): @inline_args diff --git a/lark-stubs/tree.pyi b/lark-stubs/tree.pyi index ea99ff6..0c12819 100644 --- a/lark-stubs/tree.pyi +++ b/lark-stubs/tree.pyi @@ -40,6 +40,9 @@ class Tree: def expand_kids_by_index(self, *indices: int) -> None: ... + def expand_kids_by_data(self, *data_values: str) -> bool: + ... + def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: ... diff --git a/lark/__init__.py b/lark/__init__.py index f056182..909d410 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, from .lexer import Token from .lark import Lark -__version__ = "0.11.4" +__version__ = "0.12.0" diff --git a/lark/ast_utils.py b/lark/ast_utils.py index 0f2e498..0c03d45 100644 --- a/lark/ast_utils.py +++ b/lark/ast_utils.py @@ -19,15 +19,17 @@ class AsList(object): Subclasses will be instanciated with the parse results as a single list, instead of as arguments. """ -def camel_to_snake(name): - return re.sub(r'(?', @@ -113,9 +114,10 @@ RULES = { ''], '_template_params': ['RULE', '_template_params _COMMA RULE'], - 'expansions': ['alias', - 'expansions _OR alias', - 'expansions _NL _OR alias'], + 'expansions': ['_expansions'], + '_expansions': ['alias', + '_expansions _OR alias', + '_expansions _NL_OR alias'], '?alias': ['expansion _TO RULE', 'expansion'], 'expansion': ['_expansion'], @@ -175,27 +177,136 @@ RULES = { } +# Value 5 keeps the number of states in the lalr parser somewhat minimal +# It isn't optimal, but close to it. See PR #949 +SMALL_FACTOR_THRESHOLD = 5 +# The Threshold whether repeat via ~ are split up into different rules +# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low, +# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts. +# (See PR #949) +REPEAT_BREAK_THRESHOLD = 50 + + @inline_args class EBNF_to_BNF(Transformer_InPlace): def __init__(self): self.new_rules = [] - self.rules_by_expr = {} + self.rules_cache = {} self.prefix = 'anon' self.i = 0 self.rule_options = None - def _add_recurse_rule(self, type_, expr): - if expr in self.rules_by_expr: - return self.rules_by_expr[expr] - - new_name = '__%s_%s_%d' % (self.prefix, type_, self.i) + def _name_rule(self, inner): + new_name = '__%s_%s_%d' % (self.prefix, inner, self.i) self.i += 1 - t = NonTerminal(new_name) - tree = ST('expansions', [ST('expansion', [expr]), ST('expansion', [t, expr])]) - self.new_rules.append((new_name, tree, self.rule_options)) - self.rules_by_expr[expr] = t + return new_name + + def _add_rule(self, key, name, expansions): + t = NonTerminal(name) + self.new_rules.append((name, expansions, self.rule_options)) + self.rules_cache[key] = t return t + def _add_recurse_rule(self, type_, expr): + try: + return self.rules_cache[expr] + except KeyError: + new_name = self._name_rule(type_) + t = NonTerminal(new_name) + tree = ST('expansions', [ + ST('expansion', [expr]), + ST('expansion', [t, expr]) + ]) + return self._add_rule(expr, new_name, tree) + + def _add_repeat_rule(self, a, b, target, atom): + """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times. + + When called recursively (into target), it repeats atom for x(n) times, where: + x(0) = 1 + x(n) = a(n) * x(n-1) + b + + Example rule when a=3, b=4: + + new_rule: target target target atom atom atom atom + + """ + key = (a, b, target, atom) + try: + return self.rules_cache[key] + except KeyError: + new_name = self._name_rule('repeat_a%d_b%d' % (a, b)) + tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)]) + return self._add_rule(key, new_name, tree) + + def _add_repeat_opt_rule(self, a, b, target, target_opt, atom): + """Creates a rule that matches atom 0 to (a*n+b)-1 times. + + When target matches n times atom, and target_opt 0 to n-1 times target_opt, + + First we generate target * i followed by target_opt, for i from 0 to a-1 + These match 0 to n*a - 1 times atom + + Then we generate target * a followed by atom * i, for i from 0 to b-1 + These match n*a to n*a + b-1 times atom + + The created rule will not have any shift/reduce conflicts so that it can be used with lalr + + Example rule when a=3, b=4: + + new_rule: target_opt + | target target_opt + | target target target_opt + + | target target target + | target target target atom + | target target target atom atom + | target target target atom atom atom + + """ + key = (a, b, target, atom, "opt") + try: + return self.rules_cache[key] + except KeyError: + new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b)) + tree = ST('expansions', [ + ST('expansion', [target]*i + [target_opt]) for i in range(a) + ] + [ + ST('expansion', [target]*a + [atom]*i) for i in range(b) + ]) + return self._add_rule(key, new_name, tree) + + def _generate_repeats(self, rule, mn, mx): + """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times. + """ + # For a small number of repeats, we can take the naive approach + if mx < REPEAT_BREAK_THRESHOLD: + return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)]) + + # For large repeat values, we break the repetition into sub-rules. + # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``. + # We then use small_factors to split up mn and diff up into values [(a, b), ...] + # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt + # to generate a complete rule/expression that matches the corresponding number of repeats + mn_target = rule + for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD): + mn_target = self._add_repeat_rule(a, b, mn_target, rule) + if mx == mn: + return mn_target + + diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less + diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD) + diff_target = rule # Match rule 1 times + diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times) + for a, b in diff_factors[:-1]: + diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) + diff_target = self._add_repeat_rule(a, b, diff_target, rule) + + a, b = diff_factors[-1] + diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule) + + return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])]) + def expr(self, rule, op, *args): if op.value == '?': empty = ST('expansion', []) @@ -220,7 +331,9 @@ class EBNF_to_BNF(Transformer_InPlace): mn, mx = map(int, args) if mx < mn or mn < 0: raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) - return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) + + return self._generate_repeats(rule, mn, mx) + assert False, op def maybe(self, rule): @@ -244,12 +357,8 @@ class SimplifyRule_Visitor(Visitor): @staticmethod def _flatten(tree): - while True: - to_expand = [i for i, child in enumerate(tree.children) - if isinstance(child, Tree) and child.data == tree.data] - if not to_expand: - break - tree.expand_kids_by_index(*to_expand) + while tree.expand_kids_by_data(tree.data): + pass def expansion(self, tree): # rules_list unpacking @@ -487,8 +596,7 @@ def _make_joined_pattern(regexp, flags_set): return PatternRE(regexp, flags) - -class TerminalTreeToPattern(Transformer): +class TerminalTreeToPattern(Transformer_NonRecursive): def pattern(self, ps): p ,= ps return p @@ -505,6 +613,10 @@ class TerminalTreeToPattern(Transformer): if len(exps) == 1: return exps[0] + # Do a bit of sorting to make sure that the longest option is returned + # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match) + exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value))) + pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) return _make_joined_pattern(pattern, {i.flags for i in exps}) @@ -558,8 +670,8 @@ class Grammar: def compile(self, start, terminals_to_keep): # We change the trees in-place (to support huge grammars) # So deepcopy allows calling compile more than once. - term_defs = deepcopy(list(self.term_defs)) - rule_defs = [(n,p,nr_deepcopy_tree(t),o) for n,p,t,o in self.rule_defs] + term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs] + rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs] # =================== # Compile Terminals @@ -632,7 +744,10 @@ class Grammar: else: exp_options = options - assert all(isinstance(x, Symbol) for x in expansion), expansion + for sym in expansion: + assert isinstance(sym, Symbol) + if sym.is_term and exp_options and exp_options.keep_all_tokens: + sym.filter_out = False rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) compiled_rules.append(rule) @@ -666,12 +781,13 @@ class Grammar: break # Filter out unused terminals - used_terms = {t.name for r in compiled_rules - for t in r.expansion - if isinstance(t, Terminal)} - terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) - if unused: - logger.debug("Unused terminals: %s", [t.name for t in unused]) + if terminals_to_keep != '*': + used_terms = {t.name for r in compiled_rules + for t in r.expansion + if isinstance(t, Terminal)} + terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep) + if unused: + logger.debug("Unused terminals: %s", [t.name for t in unused]) return terminals, compiled_rules, self.ignore @@ -804,7 +920,7 @@ def _get_parser(): parser_conf = ParserConf(rules, callback, ['start']) lexer_conf.lexer_type = 'standard' parser_conf.parser_type = 'lalr' - _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {}) + _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None) return _get_parser.cache GRAMMAR_ERRORS = [ @@ -981,9 +1097,7 @@ class GrammarBuilder: # TODO: think about what to do with 'options' base = self._definitions[name][1] - while len(base.children) == 2: - assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', base - base = base.children[0] + assert isinstance(base, Tree) and base.data == 'expansions' base.children.insert(0, exp) def _ignore(self, exp_or_name): @@ -1226,6 +1340,12 @@ def verify_used_files(file_hashes): return False return True +def list_grammar_imports(grammar, import_paths=[]): + "Returns a list of paths to the lark grammars imported by the given grammar (recursively)" + builder = GrammarBuilder(False, import_paths) + builder.load_grammar(grammar, '') + return list(builder.used_files.keys()) + def load_grammar(grammar, source, import_paths, global_keep_all_tokens): builder = GrammarBuilder(global_keep_all_tokens, import_paths) builder.load_grammar(grammar, source) diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 286038e..fa526b0 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -204,8 +204,7 @@ class AmbiguousExpander: if i in self.to_expand: ambiguous.append(i) - to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)] - child.expand_kids_by_index(*to_expand) + child.expand_kids_by_data('_ambig') if not ambiguous: return self.node_builder(children) diff --git a/lark/parsers/lalr_interactive_parser.py b/lark/parsers/lalr_interactive_parser.py index ce596b5..d6780cb 100644 --- a/lark/parsers/lalr_interactive_parser.py +++ b/lark/parsers/lalr_interactive_parser.py @@ -65,7 +65,7 @@ class InteractiveParser(object): """Print the output of ``choices()`` in a way that's easier to read.""" out = ["Parser choices:"] for k, v in self.choices().items(): - out.append('\t- %s -> %s' % (k, v)) + out.append('\t- %s -> %r' % (k, v)) out.append('stack size: %s' % len(self.parser_state.state_stack)) return '\n'.join(out) diff --git a/lark/tree.py b/lark/tree.py index bee53cf..0937b85 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -107,6 +107,17 @@ class Tree(object): kid = self.children[i] self.children[i:i+1] = kid.children + def expand_kids_by_data(self, *data_values): + """Expand (inline) children with any of the given data values. Returns True if anything changed""" + changed = False + for i in range(len(self.children)-1, -1, -1): + child = self.children[i] + if isinstance(child, Tree) and child.data in data_values: + self.children[i:i+1] = child.children + changed = True + return changed + + def scan_values(self, pred): """Return all values in the tree that evaluate pred(value) as true. diff --git a/lark/utils.py b/lark/utils.py index ea78801..051adfa 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -187,7 +187,7 @@ def get_regexp_width(expr): return 1, sre_constants.MAXREPEAT else: return 0, sre_constants.MAXREPEAT - + ###} @@ -287,8 +287,8 @@ except ImportError: atomicwrites = None class FS: - exists = os.path.exists - + exists = staticmethod(os.path.exists) + @staticmethod def open(name, mode="r", **kwargs): if atomicwrites and "w" in mode: @@ -359,3 +359,29 @@ def _serialize(value, memo): return {key:_serialize(elem, memo) for key, elem in value.items()} # assert value is None or isinstance(value, (int, float, str, tuple)), value return value + + + + +def small_factors(n, max_factor): + """ + Splits n up into smaller factors and summands <= max_factor. + Returns a list of [(a, b), ...] + so that the following code returns n: + + n = 1 + for a, b in values: + n = n * a + b + + Currently, we also keep a + b <= max_factor, but that might change + """ + assert n >= 0 + assert max_factor > 2 + if n <= max_factor: + return [(n, 0)] + + for a in range(max_factor, 1, -1): + r, b = divmod(n, a) + if a + b <= max_factor: + return small_factors(r, max_factor) + [(a, b)] + assert False, "Failed to factorize %s" % n diff --git a/lark/visitors.py b/lark/visitors.py index 23ef64a..d45bb19 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -149,6 +149,59 @@ class Transformer(_Decoratable): return token +def merge_transformers(base_transformer=None, **transformers_to_merge): + """Merge a collection of transformers into the base_transformer, each into its own 'namespace'. + + When called, it will collect the methods from each transformer, and assign them to base_transformer, + with their name prefixed with the given keyword, as ``prefix__methodname``. + + This function is especially useful for processing grammars that import other grammars, + thereby creating some of their rules in a 'namespace'. (i.e with a consistent name prefix). + In this case, the key for the transformer should match the name of the imported grammar. + + Parameters: + base_transformer (Transformer, optional): The transformer that all other transformers will be added to. + **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. + + Raises: + AttributeError: In case of a name collision in the merged methods + + Example: + :: + + class TBase(Transformer): + def start(self, children): + return children[0] + 'bar' + + class TImportedGrammar(Transformer): + def foo(self, children): + return "foo" + + composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) + + t = Tree('start', [ Tree('imported__foo', []) ]) + + assert composed_transformer.transform(t) == 'foobar' + + """ + if base_transformer is None: + base_transformer = Transformer() + for prefix, transformer in transformers_to_merge.items(): + for method_name in dir(transformer): + method = getattr(transformer, method_name) + if not callable(method): + continue + if method_name.startswith("_") or method_name == "transform": + continue + prefixed_method = prefix + "__" + method_name + if hasattr(base_transformer, prefixed_method): + raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) + + setattr(base_transformer, prefixed_method, method) + + return base_transformer + + class InlineTransformer(Transformer): # XXX Deprecated def _call_userfunc(self, tree, new_children=None): # Assumes tree is already transformed diff --git a/setup.cfg b/setup.cfg index 6ddead9..6d71f28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,6 +5,4 @@ zip_safe= universal = 1 [metadata] -description-file = README.md license_file = LICENSE - diff --git a/tests/test_grammar.py b/tests/test_grammar.py index a643117..c771f2b 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -1,10 +1,10 @@ from __future__ import absolute_import -import sys +import os from unittest import TestCase, main -from lark import Lark, Token, Tree -from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors +from lark import Lark, Token, Tree, ParseError, UnexpectedInput +from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors, list_grammar_imports from lark.load_grammar import FromPackageLoader @@ -198,6 +198,77 @@ class TestGrammar(TestCase): x = find_grammar_errors(text) assert [e.line for e, _s in find_grammar_errors(text)] == [2, 6] + def test_ranged_repeat_terms(self): + g = u"""!start: AAA + AAA: "A"~3 + """ + l = Lark(g, parser='lalr') + self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"])) + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA') + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') + + g = u"""!start: AABB CC + AABB: "A"~0..2 "B"~2 + CC: "C"~1..2 + """ + l = Lark(g, parser='lalr') + self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC'])) + self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C'])) + self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC'])) + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB') + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') + + def test_ranged_repeat_large(self): + g = u"""!start: "A"~60 + """ + l = Lark(g, parser='lalr') + self.assertGreater(len(l.rules), 1, "Expected that more than one rule will be generated") + self.assertEqual(l.parse(u'A' * 60), Tree('start', ["A"] * 60)) + self.assertRaises(ParseError, l.parse, u'A' * 59) + self.assertRaises((ParseError, UnexpectedInput), l.parse, u'A' * 61) + + g = u"""!start: "A"~15..100 + """ + l = Lark(g, parser='lalr') + for i in range(0, 110): + if 15 <= i <= 100: + self.assertEqual(l.parse(u'A' * i), Tree('start', ['A']*i)) + else: + self.assertRaises(UnexpectedInput, l.parse, u'A' * i) + + # 8191 is a Mersenne prime + g = u"""start: "A"~8191 + """ + l = Lark(g, parser='lalr') + self.assertEqual(l.parse(u'A' * 8191), Tree('start', [])) + self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) + self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) + + def test_large_terminal(self): + g = "start: NUMBERS\n" + g += "NUMBERS: " + '|'.join('"%s"' % i for i in range(0, 1000)) + + l = Lark(g, parser='lalr') + for i in (0, 9, 99, 999): + self.assertEqual(l.parse(str(i)), Tree('start', [str(i)])) + for i in (-1, 1000): + self.assertRaises(UnexpectedInput, l.parse, str(i)) + + def test_list_grammar_imports(self): + grammar = """ + %import .test_templates_import (start, sep) + + %override sep{item, delim}: item (delim item)* delim? + %ignore " " + """ + + imports = list_grammar_imports(grammar, [os.path.dirname(__file__)]) + self.assertEqual({os.path.split(i)[-1] for i in imports}, {'test_templates_import.lark', 'templates.lark'}) + + imports = list_grammar_imports('%import common.WS', []) + assert len(imports) == 1 and imports[0].pkg_name == 'lark' if __name__ == '__main__': diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index 1ad6449..a205446 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -15,8 +15,8 @@ TEST_PATH = os.path.abspath(os.path.dirname(__file__)) NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') -if not os.path.exists(NEARLEY_PATH): - logger.warn("Nearley not installed. Skipping Nearley tests!") +if not os.path.exists(BUILTIN_PATH): + logger.warn("Nearley not included. Skipping Nearley tests! (use git submodule to add)") raise ImportError("Skipping Nearley tests!") import js2py # Ensures that js2py exists, to avoid failing tests diff --git a/tests/test_parser.py b/tests/test_parser.py index 1c3581c..35f1c14 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2204,27 +2204,7 @@ def _make_parser_test(LEXER, PARSER): self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') - def test_ranged_repeat_terms(self): - g = u"""!start: AAA - AAA: "A"~3 - """ - l = _Lark(g) - self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"])) - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA') - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') - g = u"""!start: AABB CC - AABB: "A"~0..2 "B"~2 - CC: "C"~1..2 - """ - l = _Lark(g) - self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC'])) - self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C'])) - self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC'])) - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB') - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') - self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX def test_priority_vs_embedded(self): diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index f132312..e2f2dbe 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -3,6 +3,7 @@ import json import sys import unittest +from itertools import product from unittest import TestCase from lark import Lark @@ -20,8 +21,8 @@ def _remove_ws(s): class TestReconstructor(TestCase): - def assert_reconstruct(self, grammar, code): - parser = Lark(grammar, parser='lalr', maybe_placeholders=False) + def assert_reconstruct(self, grammar, code, **options): + parser = Lark(grammar, parser='lalr', maybe_placeholders=False, **options) tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new)) @@ -142,6 +143,17 @@ class TestReconstructor(TestCase): new_json = Reconstructor(json_parser).reconstruct(tree) self.assertEqual(json.loads(new_json), json.loads(test_json)) + def test_keep_all_tokens(self): + g = """ + start: "a"? _B? c? _d? + _B: "b" + c: "c" + _d: "d" + """ + examples = list(map(''.join, product(('', 'a'), ('', 'b'), ('', 'c'), ('', 'd'), ))) + for code in examples: + self.assert_reconstruct(g, code, keep_all_tokens=True) + @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.") def test_switch_grammar_unicode_terminal(self): """ diff --git a/tests/test_trees.py b/tests/test_trees.py index c7f9787..82bf6c9 100644 --- a/tests/test_trees.py +++ b/tests/test_trees.py @@ -9,7 +9,7 @@ import functools from lark.tree import Tree from lark.lexer import Token from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ - Transformer_InPlaceRecursive, Transformer_NonRecursive + Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers class TestTrees(TestCase): @@ -233,21 +233,62 @@ class TestTrees(TestCase): x = MyTransformer().transform( t ) self.assertEqual(x, t2) - + def test_transformer_variants(self): tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): class T(base): def add(self, children): return sum(children) - + def N(self, token): return int(token) - + copied = copy.deepcopy(tree) result = T().transform(copied) self.assertEqual(result, Tree('start', [3, 7])) + def test_merge_transformers(self): + tree = Tree('start', [ + Tree('main', [ + Token("A", '1'), Token("B", '2') + ]), + Tree("module__main", [ + Token("A", "2"), Token("B", "3") + ]) + ]) + + class T1(Transformer): + A = int + B = int + main = sum + start = list + def module__main(self, children): + return sum(children) + + class T2(Transformer): + A = int + B = int + main = sum + start = list + + class T3(Transformer): + def main(self, children): + return sum(children) + + class T4(Transformer): + main = sum + + + t1_res = T1().transform(tree) + composed_res = merge_transformers(T2(), module=T3()).transform(tree) + self.assertEqual(t1_res, composed_res) + + composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) + self.assertEqual(t1_res, composed_res2) + + with self.assertRaises(AttributeError): + merge_transformers(T1(), module=T3()) if __name__ == '__main__': unittest.main() diff --git a/tox.ini b/tox.ini index ef19e2c..1f60e48 100644 --- a/tox.ini +++ b/tox.ini @@ -1,18 +1,7 @@ [tox] -envlist = py27, py34, py35, py36, py37, py38, py39, pypy, pypy3 +envlist = py27, py34, py35, py36, py37, py38, py39, py310, pypy, pypy3 skip_missing_interpreters=true -[travis] -2.7 = py27 -3.4 = py34 -3.5 = py35 -3.6 = py36 -3.7 = py37 -3.8 = py38 -3.9 = py39 -pypy = pypy -pypy3 = pypy3 - [testenv] whitelist_externals = git deps =