| @@ -103,12 +103,17 @@ v_args | |||
| .. autofunction:: lark.visitors.v_args | |||
| merge_transformers | |||
| ------------------ | |||
| .. autofunction:: lark.visitors.merge_transformers | |||
| Discard | |||
| ------- | |||
| .. autoclass:: lark.visitors.Discard | |||
| VisitError | |||
| ------- | |||
| ---------- | |||
| .. autoclass:: lark.exceptions.VisitError | |||
| @@ -0,0 +1,10 @@ | |||
| Grammar Composition | |||
| =================== | |||
| This example shows how to do grammar composition in Lark, by creating a new | |||
| file format that allows both CSV and JSON to co-exist. | |||
| We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||
| they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||
| See [``main.py``](main.py) for more details. | |||
| @@ -0,0 +1,6 @@ | |||
| {"header": ["this", "is", "json", 1111]} | |||
| # file lines author | |||
| data.json 12 Robin | |||
| data.csv 30 erezsh | |||
| compiler.py 123123 Megalng | |||
| {"footer": "done"} | |||
| @@ -0,0 +1,14 @@ | |||
| start: header _NL row+ | |||
| header: "#" " "? (WORD _SEPARATOR?)+ | |||
| row: (_anything _SEPARATOR?)+ _NL | |||
| _anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT | |||
| NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ | |||
| _SEPARATOR: /[ ]+/ | |||
| | "\t" | |||
| | "," | |||
| %import common.NEWLINE -> _NL | |||
| %import common.WORD | |||
| %import common.INT | |||
| %import common.FLOAT | |||
| %import common.SIGNED_FLOAT | |||
| @@ -0,0 +1,26 @@ | |||
| "Transformer for evaluating csv.lark" | |||
| from lark import Transformer | |||
| class CsvTreeToPandasDict(Transformer): | |||
| INT = int | |||
| FLOAT = float | |||
| SIGNED_FLOAT = float | |||
| WORD = str | |||
| NON_SEPARATOR_STRING = str | |||
| def row(self, children): | |||
| return children | |||
| def start(self, children): | |||
| data = {} | |||
| header = children[0].children | |||
| for heading in header: | |||
| data[heading] = [] | |||
| for row in children[1:]: | |||
| for i, element in enumerate(row): | |||
| data[header[i]].append(element) | |||
| return data | |||
| @@ -0,0 +1,17 @@ | |||
| "Transformer for evaluating json.lark" | |||
| from lark import Transformer, v_args | |||
| class JsonTreeToJson(Transformer): | |||
| @v_args(inline=True) | |||
| def string(self, s): | |||
| return s[1:-1].replace('\\"', '"') | |||
| array = list | |||
| pair = tuple | |||
| object = dict | |||
| number = v_args(inline=True)(float) | |||
| null = lambda self, _: None | |||
| true = lambda self, _: True | |||
| false = lambda self, _: False | |||
| @@ -0,0 +1,19 @@ | |||
| ?start: value | |||
| ?value: object | |||
| | array | |||
| | string | |||
| | SIGNED_NUMBER -> number | |||
| | "true" -> true | |||
| | "false" -> false | |||
| | "null" -> null | |||
| array : "[" _WS? [value ("," _WS? value)*] "]" | |||
| object : "{" _WS? [pair ("," _WS? pair)*] "}" | |||
| pair : string ":" _WS value | |||
| string : ESCAPED_STRING | |||
| %import common.ESCAPED_STRING | |||
| %import common.SIGNED_NUMBER | |||
| %import common.WS -> _WS | |||
| @@ -0,0 +1,51 @@ | |||
| """ | |||
| Grammar Composition | |||
| =================== | |||
| This example shows how to do grammar composition in Lark, by creating a new | |||
| file format that allows both CSV and JSON to co-exist. | |||
| 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||
| and allows them to be used one after the other. | |||
| In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||
| which creates an implicit namespace and allows them to coexist without collisions. | |||
| 2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||
| The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||
| The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||
| This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||
| or being imported, or who is doing the importing. | |||
| """ | |||
| from pathlib import Path | |||
| from lark import Lark | |||
| from json import dumps | |||
| from lark.visitors import Transformer, merge_transformers | |||
| from eval_csv import CsvTreeToPandasDict | |||
| from eval_json import JsonTreeToJson | |||
| __dir__ = Path(__file__).parent | |||
| class Storage(Transformer): | |||
| def start(self, children): | |||
| return children | |||
| storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||
| parser = Lark.open("storage.lark", rel_to=__file__) | |||
| def main(): | |||
| json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||
| res = storage_transformer.transform(json_tree) | |||
| print("Just JSON: ", res) | |||
| csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||
| res = storage_transformer.transform(csv_json_tree) | |||
| print("JSON + CSV: ", dumps(res, indent=2)) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,8 @@ | |||
| start: (csv__start | json__start _NL?)+ | |||
| // Renaming of the import variables is required, as they | |||
| // receive the namespace of this file. | |||
| // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | |||
| %import .csv.start -> csv__start | |||
| %import .csv._NL -> _NL | |||
| %import .json.start -> json__start | |||
| @@ -149,6 +149,59 @@ class Transformer(_Decoratable): | |||
| return token | |||
| def merge_transformers(base_transformer=None, **transformers_to_merge): | |||
| """Merge a collection of transformers into the base_transformer, each into its own 'namespace'. | |||
| When called, it will collect the methods from each transformer, and assign them to base_transformer, | |||
| with their name prefixed with the given keyword, as ``prefix__methodname`. | |||
| This function is especially useful for processing grammars that import other grammars, | |||
| thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix) | |||
| In this case, the key for the transformer should match the name of the imported grammar. | |||
| Parameters: | |||
| base_transformer (Transformer, optional): The transformer that all other transformers will be added to. | |||
| **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. | |||
| Raises: | |||
| AttributeError: In case of a name collision in the merged methods | |||
| Example: | |||
| :: | |||
| class TBase(Transformer): | |||
| def start(self, children): | |||
| return children[0] + 'bar' | |||
| class TImportedGrammar(Transformer): | |||
| def foo(self, children): | |||
| return "foo" | |||
| composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) | |||
| t = Tree('start', [ Tree('imported__foo', []) ]) | |||
| assert composed_transformer.transform(t) == 'foobar' | |||
| """ | |||
| if base_transformer is None: | |||
| base_transformer = Transformer() | |||
| for prefix, transformer in transformers_to_merge.items(): | |||
| for method_name in dir(transformer): | |||
| method = getattr(transformer, method_name) | |||
| if not callable(method): | |||
| continue | |||
| if method_name.startswith("_") or method_name == "transform": | |||
| continue | |||
| prefixed_method = prefix + "__" + method_name | |||
| if hasattr(base_transformer, prefixed_method): | |||
| raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) | |||
| setattr(base_transformer, prefixed_method, method) | |||
| return base_transformer | |||
| class InlineTransformer(Transformer): # XXX Deprecated | |||
| def _call_userfunc(self, tree, new_children=None): | |||
| # Assumes tree is already transformed | |||
| @@ -9,7 +9,7 @@ import functools | |||
| from lark.tree import Tree | |||
| from lark.lexer import Token | |||
| from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | |||
| Transformer_InPlaceRecursive, Transformer_NonRecursive | |||
| Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers | |||
| class TestTrees(TestCase): | |||
| @@ -233,21 +233,62 @@ class TestTrees(TestCase): | |||
| x = MyTransformer().transform( t ) | |||
| self.assertEqual(x, t2) | |||
| def test_transformer_variants(self): | |||
| tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | |||
| for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | |||
| class T(base): | |||
| def add(self, children): | |||
| return sum(children) | |||
| def N(self, token): | |||
| return int(token) | |||
| copied = copy.deepcopy(tree) | |||
| result = T().transform(copied) | |||
| self.assertEqual(result, Tree('start', [3, 7])) | |||
| def test_merge_transformers(self): | |||
| tree = Tree('start', [ | |||
| Tree('main', [ | |||
| Token("A", '1'), Token("B", '2') | |||
| ]), | |||
| Tree("module__main", [ | |||
| Token("A", "2"), Token("B", "3") | |||
| ]) | |||
| ]) | |||
| class T1(Transformer): | |||
| A = int | |||
| B = int | |||
| main = sum | |||
| start = list | |||
| def module__main(self, children): | |||
| return sum(children) | |||
| class T2(Transformer): | |||
| A = int | |||
| B = int | |||
| main = sum | |||
| start = list | |||
| class T3(Transformer): | |||
| def main(self, children): | |||
| return sum(children) | |||
| class T4(Transformer): | |||
| main = sum | |||
| t1_res = T1().transform(tree) | |||
| composed_res = merge_transformers(T2(), module=T3()).transform(tree) | |||
| self.assertEqual(t1_res, composed_res) | |||
| composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) | |||
| self.assertEqual(t1_res, composed_res2) | |||
| with self.assertRaises(AttributeError): | |||
| merge_transformers(T1(), module=T3()) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||