Merge pull request #976 from lark-parser/merge_transformers

4 years ago · 94dcec4c36
--- a/docs/visitors.rst
+++ b/docs/visitors.rst
@@ -103,12 +103,17 @@ v_args

 .. autofunction:: lark.visitors.v_args

 merge_transformers
 ------------------

 .. autofunction:: lark.visitors.merge_transformers

 Discard
 -------

 .. autoclass:: lark.visitors.Discard

 VisitError
 -------
 ----------

 .. autoclass:: lark.exceptions.VisitError
--- a/examples/composition/README.md
+++ b/examples/composition/README.md
@@ -0,0 +1,10 @@
 Grammar Composition
 ===================

 This example shows how to do grammar composition in Lark, by creating a new
 file format that allows both CSV and JSON to co-exist.

 We show how, by using namespaces, Lark grammars and their transformers can be fully reused -
 they don't need to care if their grammar is used directly, or being imported, or who is doing the importing.

 See [``main.py``](main.py) for more details.
--- a/examples/composition/combined_csv_and_json.txt
+++ b/examples/composition/combined_csv_and_json.txt
@@ -0,0 +1,6 @@
 {"header": ["this", "is", "json", 1111]}
 # file lines author
 data.json 12 Robin
 data.csv  30 erezsh
 compiler.py 123123 Megalng
 {"footer": "done"}
--- a/examples/composition/csv.lark
+++ b/examples/composition/csv.lark
@@ -0,0 +1,14 @@
 start: header _NL row+
 header: "#" " "? (WORD _SEPARATOR?)+
 row: (_anything _SEPARATOR?)+ _NL
 _anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT
 NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/
 _SEPARATOR: /[  ]+/
          | "\t"
          | ","

 %import common.NEWLINE -> _NL
 %import common.WORD
 %import common.INT
 %import common.FLOAT
 %import common.SIGNED_FLOAT
--- a/examples/composition/eval_csv.py
+++ b/examples/composition/eval_csv.py
@@ -0,0 +1,26 @@
 "Transformer for evaluating csv.lark"

 from lark import Transformer

 class CsvTreeToPandasDict(Transformer):
    INT = int
    FLOAT = float
    SIGNED_FLOAT = float
    WORD = str
    NON_SEPARATOR_STRING = str

    def row(self, children):
        return children

    def start(self, children):
        data = {}

        header = children[0].children
        for heading in header:
            data[heading] = []

        for row in children[1:]:
            for i, element in enumerate(row):
                data[header[i]].append(element)

        return data
--- a/examples/composition/eval_json.py
+++ b/examples/composition/eval_json.py
@@ -0,0 +1,17 @@
 "Transformer for evaluating json.lark"

 from lark import Transformer, v_args

 class JsonTreeToJson(Transformer):
    @v_args(inline=True)
    def string(self, s):
        return s[1:-1].replace('\\"', '"')

    array = list
    pair = tuple
    object = dict
    number = v_args(inline=True)(float)

    null = lambda self, _: None
    true = lambda self, _: True
    false = lambda self, _: False
--- a/examples/composition/json.lark
+++ b/examples/composition/json.lark
@@ -0,0 +1,19 @@
 ?start: value

 ?value: object
      | array
      | string
      | SIGNED_NUMBER      -> number
      | "true"             -> true
      | "false"            -> false
      | "null"             -> null

 array  : "[" _WS? [value ("," _WS? value)*] "]"
 object : "{" _WS? [pair ("," _WS? pair)*] "}"
 pair   : string ":" _WS value

 string : ESCAPED_STRING

 %import common.ESCAPED_STRING
 %import common.SIGNED_NUMBER
 %import common.WS -> _WS
--- a/examples/composition/main.py
+++ b/examples/composition/main.py
@@ -0,0 +1,51 @@
 """
 Grammar Composition
 ===================

 This example shows how to do grammar composition in Lark, by creating a new
 file format that allows both CSV and JSON to co-exist.

 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``,
  and allows them to be used one after the other.

  In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__),
  which creates an implicit namespace and allows them to coexist without collisions.

 2) We merge their respective transformers (unaware of each other) into a new base transformer.
   The resulting transformer can evaluate both JSON and CSV in the parse tree.

  The methods of each transformer are renamed into their appropriate namespace, using the given prefix.
  This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly,
  or being imported, or who is doing the importing.

 """
 from pathlib import Path
 from lark import Lark
 from json import dumps
 from lark.visitors import Transformer, merge_transformers

 from eval_csv import CsvTreeToPandasDict
 from eval_json import JsonTreeToJson

 __dir__ = Path(__file__).parent

 class Storage(Transformer):
    def start(self, children):
        return children

 storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson())

 parser = Lark.open("storage.lark", rel_to=__file__)

 def main():
    json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }}))
    res = storage_transformer.transform(json_tree)
    print("Just JSON: ", res)

    csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read())
    res = storage_transformer.transform(csv_json_tree)
    print("JSON + CSV: ", dumps(res, indent=2))


 if __name__ == "__main__":
    main()
--- a/examples/composition/storage.lark
+++ b/examples/composition/storage.lark
@@ -0,0 +1,8 @@
 start: (csv__start | json__start _NL?)+

 // Renaming of the import variables is required, as they
 // receive the namespace of this file.
 // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
 %import .csv.start -> csv__start
 %import .csv._NL -> _NL
 %import .json.start -> json__start
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -149,6 +149,59 @@ class Transformer(_Decoratable):
        return token


 def merge_transformers(base_transformer=None, **transformers_to_merge):
    """Merge a collection of transformers into the base_transformer, each into its own 'namespace'.

    When called, it will collect the methods from each transformer, and assign them to base_transformer,
    with their name prefixed with the given keyword, as ``prefix__methodname`.

    This function is especially useful for processing grammars that import other grammars,
    thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix)
    In this case, the key for the transformer should match the name of the imported grammar.

    Parameters:
        base_transformer (Transformer, optional): The transformer that all other transformers will be added to.
        **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``.

    Raises:
        AttributeError: In case of a name collision in the merged methods

    Example:
        ::

            class TBase(Transformer):
                def start(self, children):
                    return children[0] + 'bar'

            class TImportedGrammar(Transformer):
                def foo(self, children):
                    return "foo"

            composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar())

            t = Tree('start', [ Tree('imported__foo', []) ])

            assert composed_transformer.transform(t) == 'foobar'

    """
    if base_transformer is None:
        base_transformer = Transformer()
    for prefix, transformer in transformers_to_merge.items():
        for method_name in dir(transformer):
            method = getattr(transformer, method_name)
            if not callable(method):
                continue
            if method_name.startswith("_") or method_name == "transform":
                continue
            prefixed_method = prefix + "__" + method_name
            if hasattr(base_transformer, prefixed_method):
                raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)

            setattr(base_transformer, prefixed_method, method)

    return base_transformer


 class InlineTransformer(Transformer):   # XXX Deprecated
    def _call_userfunc(self, tree, new_children=None):
        # Assumes tree is already transformed
--- a/tests/test_trees.py
+++ b/tests/test_trees.py
@@ -9,7 +9,7 @@ import functools
 from lark.tree import Tree
 from lark.lexer import Token
 from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \
    Transformer_InPlaceRecursive, Transformer_NonRecursive
    Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers


 class TestTrees(TestCase):
@@ -233,21 +233,62 @@ class TestTrees(TestCase):

        x = MyTransformer().transform( t )
        self.assertEqual(x, t2)
    

    def test_transformer_variants(self):
        tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])])
        for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
            class T(base):
                def add(self, children):
                    return sum(children)
                

                def N(self, token):
                    return int(token)
            

            copied = copy.deepcopy(tree)
            result = T().transform(copied)
            self.assertEqual(result, Tree('start', [3, 7]))

    def test_merge_transformers(self):
        tree = Tree('start', [
            Tree('main', [
                Token("A", '1'), Token("B", '2')
            ]),
            Tree("module__main", [
                Token("A", "2"), Token("B", "3")
            ])
        ])

        class T1(Transformer):
            A = int
            B = int
            main = sum
            start = list
            def module__main(self, children):
                return sum(children)

        class T2(Transformer):
            A = int
            B = int
            main = sum
            start = list

        class T3(Transformer):
            def main(self, children):
                return sum(children)

        class T4(Transformer):
            main = sum


        t1_res = T1().transform(tree)
        composed_res = merge_transformers(T2(), module=T3()).transform(tree)
        self.assertEqual(t1_res, composed_res)

        composed_res2 = merge_transformers(T2(), module=T4()).transform(tree)
        self.assertEqual(t1_res, composed_res2)

        with self.assertRaises(AttributeError):
            merge_transformers(T1(), module=T3())

 if __name__ == '__main__':
    unittest.main()