| @@ -103,12 +103,17 @@ v_args | |||||
| .. autofunction:: lark.visitors.v_args | .. autofunction:: lark.visitors.v_args | ||||
| merge_transformers | |||||
| ------------------ | |||||
| .. autofunction:: lark.visitors.merge_transformers | |||||
| Discard | Discard | ||||
| ------- | ------- | ||||
| .. autoclass:: lark.visitors.Discard | .. autoclass:: lark.visitors.Discard | ||||
| VisitError | VisitError | ||||
| ------- | |||||
| ---------- | |||||
| .. autoclass:: lark.exceptions.VisitError | .. autoclass:: lark.exceptions.VisitError | ||||
| @@ -0,0 +1,10 @@ | |||||
| Grammar Composition | |||||
| =================== | |||||
| This example shows how to do grammar composition in Lark, by creating a new | |||||
| file format that allows both CSV and JSON to co-exist. | |||||
| We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||||
| they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||||
| See [``main.py``](main.py) for more details. | |||||
| @@ -0,0 +1,6 @@ | |||||
| {"header": ["this", "is", "json", 1111]} | |||||
| # file lines author | |||||
| data.json 12 Robin | |||||
| data.csv 30 erezsh | |||||
| compiler.py 123123 Megalng | |||||
| {"footer": "done"} | |||||
| @@ -0,0 +1,14 @@ | |||||
| start: header _NL row+ | |||||
| header: "#" " "? (WORD _SEPARATOR?)+ | |||||
| row: (_anything _SEPARATOR?)+ _NL | |||||
| _anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT | |||||
| NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ | |||||
| _SEPARATOR: /[ ]+/ | |||||
| | "\t" | |||||
| | "," | |||||
| %import common.NEWLINE -> _NL | |||||
| %import common.WORD | |||||
| %import common.INT | |||||
| %import common.FLOAT | |||||
| %import common.SIGNED_FLOAT | |||||
| @@ -0,0 +1,26 @@ | |||||
| "Transformer for evaluating csv.lark" | |||||
| from lark import Transformer | |||||
| class CsvTreeToPandasDict(Transformer): | |||||
| INT = int | |||||
| FLOAT = float | |||||
| SIGNED_FLOAT = float | |||||
| WORD = str | |||||
| NON_SEPARATOR_STRING = str | |||||
| def row(self, children): | |||||
| return children | |||||
| def start(self, children): | |||||
| data = {} | |||||
| header = children[0].children | |||||
| for heading in header: | |||||
| data[heading] = [] | |||||
| for row in children[1:]: | |||||
| for i, element in enumerate(row): | |||||
| data[header[i]].append(element) | |||||
| return data | |||||
| @@ -0,0 +1,17 @@ | |||||
| "Transformer for evaluating json.lark" | |||||
| from lark import Transformer, v_args | |||||
| class JsonTreeToJson(Transformer): | |||||
| @v_args(inline=True) | |||||
| def string(self, s): | |||||
| return s[1:-1].replace('\\"', '"') | |||||
| array = list | |||||
| pair = tuple | |||||
| object = dict | |||||
| number = v_args(inline=True)(float) | |||||
| null = lambda self, _: None | |||||
| true = lambda self, _: True | |||||
| false = lambda self, _: False | |||||
| @@ -0,0 +1,19 @@ | |||||
| ?start: value | |||||
| ?value: object | |||||
| | array | |||||
| | string | |||||
| | SIGNED_NUMBER -> number | |||||
| | "true" -> true | |||||
| | "false" -> false | |||||
| | "null" -> null | |||||
| array : "[" _WS? [value ("," _WS? value)*] "]" | |||||
| object : "{" _WS? [pair ("," _WS? pair)*] "}" | |||||
| pair : string ":" _WS value | |||||
| string : ESCAPED_STRING | |||||
| %import common.ESCAPED_STRING | |||||
| %import common.SIGNED_NUMBER | |||||
| %import common.WS -> _WS | |||||
| @@ -0,0 +1,51 @@ | |||||
| """ | |||||
| Grammar Composition | |||||
| =================== | |||||
| This example shows how to do grammar composition in Lark, by creating a new | |||||
| file format that allows both CSV and JSON to co-exist. | |||||
| 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||||
| and allows them to be used one after the other. | |||||
| In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||||
| which creates an implicit namespace and allows them to coexist without collisions. | |||||
| 2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||||
| The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||||
| The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||||
| This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||||
| or being imported, or who is doing the importing. | |||||
| """ | |||||
| from pathlib import Path | |||||
| from lark import Lark | |||||
| from json import dumps | |||||
| from lark.visitors import Transformer, merge_transformers | |||||
| from eval_csv import CsvTreeToPandasDict | |||||
| from eval_json import JsonTreeToJson | |||||
| __dir__ = Path(__file__).parent | |||||
| class Storage(Transformer): | |||||
| def start(self, children): | |||||
| return children | |||||
| storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
| parser = Lark.open("storage.lark", rel_to=__file__) | |||||
| def main(): | |||||
| json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
| res = storage_transformer.transform(json_tree) | |||||
| print("Just JSON: ", res) | |||||
| csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||||
| res = storage_transformer.transform(csv_json_tree) | |||||
| print("JSON + CSV: ", dumps(res, indent=2)) | |||||
| if __name__ == "__main__": | |||||
| main() | |||||
| @@ -0,0 +1,8 @@ | |||||
| start: (csv__start | json__start _NL?)+ | |||||
| // Renaming of the import variables is required, as they | |||||
| // receive the namespace of this file. | |||||
| // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | |||||
| %import .csv.start -> csv__start | |||||
| %import .csv._NL -> _NL | |||||
| %import .json.start -> json__start | |||||
| @@ -149,6 +149,59 @@ class Transformer(_Decoratable): | |||||
| return token | return token | ||||
| def merge_transformers(base_transformer=None, **transformers_to_merge): | |||||
| """Merge a collection of transformers into the base_transformer, each into its own 'namespace'. | |||||
| When called, it will collect the methods from each transformer, and assign them to base_transformer, | |||||
| with their name prefixed with the given keyword, as ``prefix__methodname`. | |||||
| This function is especially useful for processing grammars that import other grammars, | |||||
| thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix) | |||||
| In this case, the key for the transformer should match the name of the imported grammar. | |||||
| Parameters: | |||||
| base_transformer (Transformer, optional): The transformer that all other transformers will be added to. | |||||
| **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. | |||||
| Raises: | |||||
| AttributeError: In case of a name collision in the merged methods | |||||
| Example: | |||||
| :: | |||||
| class TBase(Transformer): | |||||
| def start(self, children): | |||||
| return children[0] + 'bar' | |||||
| class TImportedGrammar(Transformer): | |||||
| def foo(self, children): | |||||
| return "foo" | |||||
| composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) | |||||
| t = Tree('start', [ Tree('imported__foo', []) ]) | |||||
| assert composed_transformer.transform(t) == 'foobar' | |||||
| """ | |||||
| if base_transformer is None: | |||||
| base_transformer = Transformer() | |||||
| for prefix, transformer in transformers_to_merge.items(): | |||||
| for method_name in dir(transformer): | |||||
| method = getattr(transformer, method_name) | |||||
| if not callable(method): | |||||
| continue | |||||
| if method_name.startswith("_") or method_name == "transform": | |||||
| continue | |||||
| prefixed_method = prefix + "__" + method_name | |||||
| if hasattr(base_transformer, prefixed_method): | |||||
| raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) | |||||
| setattr(base_transformer, prefixed_method, method) | |||||
| return base_transformer | |||||
| class InlineTransformer(Transformer): # XXX Deprecated | class InlineTransformer(Transformer): # XXX Deprecated | ||||
| def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
| # Assumes tree is already transformed | # Assumes tree is already transformed | ||||
| @@ -9,7 +9,7 @@ import functools | |||||
| from lark.tree import Tree | from lark.tree import Tree | ||||
| from lark.lexer import Token | from lark.lexer import Token | ||||
| from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | ||||
| Transformer_InPlaceRecursive, Transformer_NonRecursive | |||||
| Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers | |||||
| class TestTrees(TestCase): | class TestTrees(TestCase): | ||||
| @@ -233,21 +233,62 @@ class TestTrees(TestCase): | |||||
| x = MyTransformer().transform( t ) | x = MyTransformer().transform( t ) | ||||
| self.assertEqual(x, t2) | self.assertEqual(x, t2) | ||||
| def test_transformer_variants(self): | def test_transformer_variants(self): | ||||
| tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | ||||
| for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | ||||
| class T(base): | class T(base): | ||||
| def add(self, children): | def add(self, children): | ||||
| return sum(children) | return sum(children) | ||||
| def N(self, token): | def N(self, token): | ||||
| return int(token) | return int(token) | ||||
| copied = copy.deepcopy(tree) | copied = copy.deepcopy(tree) | ||||
| result = T().transform(copied) | result = T().transform(copied) | ||||
| self.assertEqual(result, Tree('start', [3, 7])) | self.assertEqual(result, Tree('start', [3, 7])) | ||||
| def test_merge_transformers(self): | |||||
| tree = Tree('start', [ | |||||
| Tree('main', [ | |||||
| Token("A", '1'), Token("B", '2') | |||||
| ]), | |||||
| Tree("module__main", [ | |||||
| Token("A", "2"), Token("B", "3") | |||||
| ]) | |||||
| ]) | |||||
| class T1(Transformer): | |||||
| A = int | |||||
| B = int | |||||
| main = sum | |||||
| start = list | |||||
| def module__main(self, children): | |||||
| return sum(children) | |||||
| class T2(Transformer): | |||||
| A = int | |||||
| B = int | |||||
| main = sum | |||||
| start = list | |||||
| class T3(Transformer): | |||||
| def main(self, children): | |||||
| return sum(children) | |||||
| class T4(Transformer): | |||||
| main = sum | |||||
| t1_res = T1().transform(tree) | |||||
| composed_res = merge_transformers(T2(), module=T3()).transform(tree) | |||||
| self.assertEqual(t1_res, composed_res) | |||||
| composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) | |||||
| self.assertEqual(t1_res, composed_res2) | |||||
| with self.assertRaises(AttributeError): | |||||
| merge_transformers(T1(), module=T3()) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| unittest.main() | unittest.main() | ||||