diff --git a/docs/visitors.rst b/docs/visitors.rst index f263712..43d0513 100644 --- a/docs/visitors.rst +++ b/docs/visitors.rst @@ -103,12 +103,17 @@ v_args .. autofunction:: lark.visitors.v_args +merge_transformers +------------------ + +.. autofunction:: lark.visitors.merge_transformers + Discard ------- .. autoclass:: lark.visitors.Discard VisitError -------- +---------- .. autoclass:: lark.exceptions.VisitError \ No newline at end of file diff --git a/examples/composition/README.md b/examples/composition/README.md new file mode 100644 index 0000000..259a66a --- /dev/null +++ b/examples/composition/README.md @@ -0,0 +1,10 @@ +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +We show how, by using namespaces, Lark grammars and their transformers can be fully reused - +they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. + +See [``main.py``](main.py) for more details. \ No newline at end of file diff --git a/examples/composition/combined_csv_and_json.txt b/examples/composition/combined_csv_and_json.txt new file mode 100644 index 0000000..5b8df82 --- /dev/null +++ b/examples/composition/combined_csv_and_json.txt @@ -0,0 +1,6 @@ +{"header": ["this", "is", "json", 1111]} +# file lines author +data.json 12 Robin +data.csv 30 erezsh +compiler.py 123123 Megalng +{"footer": "done"} diff --git a/examples/composition/csv.lark b/examples/composition/csv.lark new file mode 100644 index 0000000..cc2b675 --- /dev/null +++ b/examples/composition/csv.lark @@ -0,0 +1,14 @@ +start: header _NL row+ +header: "#" " "? (WORD _SEPARATOR?)+ +row: (_anything _SEPARATOR?)+ _NL +_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ +_SEPARATOR: /[ ]+/ + | "\t" + | "," + +%import common.NEWLINE -> _NL +%import common.WORD +%import common.INT +%import common.FLOAT +%import common.SIGNED_FLOAT diff --git a/examples/composition/eval_csv.py b/examples/composition/eval_csv.py new file mode 100644 index 0000000..8b83f08 --- /dev/null +++ b/examples/composition/eval_csv.py @@ -0,0 +1,26 @@ +"Transformer for evaluating csv.lark" + +from lark import Transformer + +class CsvTreeToPandasDict(Transformer): + INT = int + FLOAT = float + SIGNED_FLOAT = float + WORD = str + NON_SEPARATOR_STRING = str + + def row(self, children): + return children + + def start(self, children): + data = {} + + header = children[0].children + for heading in header: + data[heading] = [] + + for row in children[1:]: + for i, element in enumerate(row): + data[header[i]].append(element) + + return data diff --git a/examples/composition/eval_json.py b/examples/composition/eval_json.py new file mode 100644 index 0000000..c665a19 --- /dev/null +++ b/examples/composition/eval_json.py @@ -0,0 +1,17 @@ +"Transformer for evaluating json.lark" + +from lark import Transformer, v_args + +class JsonTreeToJson(Transformer): + @v_args(inline=True) + def string(self, s): + return s[1:-1].replace('\\"', '"') + + array = list + pair = tuple + object = dict + number = v_args(inline=True)(float) + + null = lambda self, _: None + true = lambda self, _: True + false = lambda self, _: False diff --git a/examples/composition/json.lark b/examples/composition/json.lark new file mode 100644 index 0000000..bb77c35 --- /dev/null +++ b/examples/composition/json.lark @@ -0,0 +1,19 @@ +?start: value + +?value: object + | array + | string + | SIGNED_NUMBER -> number + | "true" -> true + | "false" -> false + | "null" -> null + +array : "[" _WS? [value ("," _WS? value)*] "]" +object : "{" _WS? [pair ("," _WS? pair)*] "}" +pair : string ":" _WS value + +string : ESCAPED_STRING + +%import common.ESCAPED_STRING +%import common.SIGNED_NUMBER +%import common.WS -> _WS diff --git a/examples/composition/main.py b/examples/composition/main.py new file mode 100644 index 0000000..a549abe --- /dev/null +++ b/examples/composition/main.py @@ -0,0 +1,51 @@ +""" +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, + and allows them to be used one after the other. + + In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), + which creates an implicit namespace and allows them to coexist without collisions. + +2) We merge their respective transformers (unaware of each other) into a new base transformer. + The resulting transformer can evaluate both JSON and CSV in the parse tree. + + The methods of each transformer are renamed into their appropriate namespace, using the given prefix. + This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, + or being imported, or who is doing the importing. + +""" +from pathlib import Path +from lark import Lark +from json import dumps +from lark.visitors import Transformer, merge_transformers + +from eval_csv import CsvTreeToPandasDict +from eval_json import JsonTreeToJson + +__dir__ = Path(__file__).parent + +class Storage(Transformer): + def start(self, children): + return children + +storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) + +parser = Lark.open("storage.lark", rel_to=__file__) + +def main(): + json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) + res = storage_transformer.transform(json_tree) + print("Just JSON: ", res) + + csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) + res = storage_transformer.transform(csv_json_tree) + print("JSON + CSV: ", dumps(res, indent=2)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/composition/storage.lark b/examples/composition/storage.lark new file mode 100644 index 0000000..8e2bacc --- /dev/null +++ b/examples/composition/storage.lark @@ -0,0 +1,8 @@ +start: (csv__start | json__start _NL?)+ + +// Renaming of the import variables is required, as they +// receive the namespace of this file. +// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 +%import .csv.start -> csv__start +%import .csv._NL -> _NL +%import .json.start -> json__start diff --git a/lark/visitors.py b/lark/visitors.py index 23ef64a..e2f8b53 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -149,6 +149,59 @@ class Transformer(_Decoratable): return token +def merge_transformers(base_transformer=None, **transformers_to_merge): + """Merge a collection of transformers into the base_transformer, each into its own 'namespace'. + + When called, it will collect the methods from each transformer, and assign them to base_transformer, + with their name prefixed with the given keyword, as ``prefix__methodname`. + + This function is especially useful for processing grammars that import other grammars, + thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix) + In this case, the key for the transformer should match the name of the imported grammar. + + Parameters: + base_transformer (Transformer, optional): The transformer that all other transformers will be added to. + **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. + + Raises: + AttributeError: In case of a name collision in the merged methods + + Example: + :: + + class TBase(Transformer): + def start(self, children): + return children[0] + 'bar' + + class TImportedGrammar(Transformer): + def foo(self, children): + return "foo" + + composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) + + t = Tree('start', [ Tree('imported__foo', []) ]) + + assert composed_transformer.transform(t) == 'foobar' + + """ + if base_transformer is None: + base_transformer = Transformer() + for prefix, transformer in transformers_to_merge.items(): + for method_name in dir(transformer): + method = getattr(transformer, method_name) + if not callable(method): + continue + if method_name.startswith("_") or method_name == "transform": + continue + prefixed_method = prefix + "__" + method_name + if hasattr(base_transformer, prefixed_method): + raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) + + setattr(base_transformer, prefixed_method, method) + + return base_transformer + + class InlineTransformer(Transformer): # XXX Deprecated def _call_userfunc(self, tree, new_children=None): # Assumes tree is already transformed diff --git a/tests/test_trees.py b/tests/test_trees.py index c7f9787..82bf6c9 100644 --- a/tests/test_trees.py +++ b/tests/test_trees.py @@ -9,7 +9,7 @@ import functools from lark.tree import Tree from lark.lexer import Token from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ - Transformer_InPlaceRecursive, Transformer_NonRecursive + Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers class TestTrees(TestCase): @@ -233,21 +233,62 @@ class TestTrees(TestCase): x = MyTransformer().transform( t ) self.assertEqual(x, t2) - + def test_transformer_variants(self): tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): class T(base): def add(self, children): return sum(children) - + def N(self, token): return int(token) - + copied = copy.deepcopy(tree) result = T().transform(copied) self.assertEqual(result, Tree('start', [3, 7])) + def test_merge_transformers(self): + tree = Tree('start', [ + Tree('main', [ + Token("A", '1'), Token("B", '2') + ]), + Tree("module__main", [ + Token("A", "2"), Token("B", "3") + ]) + ]) + + class T1(Transformer): + A = int + B = int + main = sum + start = list + def module__main(self, children): + return sum(children) + + class T2(Transformer): + A = int + B = int + main = sum + start = list + + class T3(Transformer): + def main(self, children): + return sum(children) + + class T4(Transformer): + main = sum + + + t1_res = T1().transform(tree) + composed_res = merge_transformers(T2(), module=T3()).transform(tree) + self.assertEqual(t1_res, composed_res) + + composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) + self.assertEqual(t1_res, composed_res2) + + with self.assertRaises(AttributeError): + merge_transformers(T1(), module=T3()) if __name__ == '__main__': unittest.main()