| @@ -1,67 +0,0 @@ | |||||
| """ | |||||
| Transformer merging | |||||
| ================== | |||||
| This example is intended to show how transformers can be merged in order to | |||||
| keep the individual steps clean and simple. | |||||
| .. note:: | |||||
| The imported rules will have to be aliased according to the file it is in. | |||||
| (See `storage.lark` for an implementation of this idea.) | |||||
| """ | |||||
| from lark import Lark, Tree | |||||
| from json import dumps | |||||
| from lark.visitors import Transformer, merge_transformers, v_args | |||||
| class JsonTreeToJson(Transformer): | |||||
| @v_args(inline=True) | |||||
| def string(self, s): | |||||
| return s[1:-1].replace('\\"', '"') | |||||
| array = list | |||||
| pair = tuple | |||||
| object = dict | |||||
| number = v_args(inline=True)(float) | |||||
| null = lambda self, _: None | |||||
| true = lambda self, _: True | |||||
| false = lambda self, _: False | |||||
| class CsvTreeToPandasDict(Transformer): | |||||
| INT = int | |||||
| FLOAT = float | |||||
| SIGNED_FLOAT = float | |||||
| WORD = str | |||||
| NON_SEPARATOR_STRING = str | |||||
| def row(self, children): | |||||
| return children | |||||
| def start(self, children): | |||||
| data = {} | |||||
| header = children[0].children | |||||
| for heading in header: | |||||
| data[heading] = [] | |||||
| for row in children[1:]: | |||||
| for i, element in enumerate(row): | |||||
| data[header[i]].append(element) | |||||
| return data | |||||
| class Base(Transformer): | |||||
| def start(self, children): | |||||
| return children[0] | |||||
| if __name__ == "__main__": | |||||
| merged = merge_transformers(Base(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
| parser = Lark.open("storage.lark") | |||||
| csv_tree = parser.parse("""# file lines author | |||||
| data.json 12 Robin | |||||
| data.csv 30 erezsh | |||||
| compiler.py 123123 Megalng | |||||
| """) | |||||
| print("CSV data in pandas form:", merged.transform(csv_tree)) | |||||
| json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
| print("JSON data transformed: ", merged.transform(json_tree)) | |||||
| @@ -0,0 +1,10 @@ | |||||
| Grammar Composition | |||||
| =================== | |||||
| This example shows how to do grammar composition in Lark, by creating a new | |||||
| file format that allows both CSV and JSON to co-exist. | |||||
| We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||||
| they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||||
| See [``main.py``](main.py) for more details. | |||||
| @@ -0,0 +1,6 @@ | |||||
| {"header": ["this", "is", "json", 1111]} | |||||
| # file lines author | |||||
| data.json 12 Robin | |||||
| data.csv 30 erezsh | |||||
| compiler.py 123123 Megalng | |||||
| {"footer": "done"} | |||||
| @@ -0,0 +1,24 @@ | |||||
| from lark import Transformer | |||||
| class CsvTreeToPandasDict(Transformer): | |||||
| INT = int | |||||
| FLOAT = float | |||||
| SIGNED_FLOAT = float | |||||
| WORD = str | |||||
| NON_SEPARATOR_STRING = str | |||||
| def row(self, children): | |||||
| return children | |||||
| def start(self, children): | |||||
| data = {} | |||||
| header = children[0].children | |||||
| for heading in header: | |||||
| data[heading] = [] | |||||
| for row in children[1:]: | |||||
| for i, element in enumerate(row): | |||||
| data[header[i]].append(element) | |||||
| return data | |||||
| @@ -0,0 +1,15 @@ | |||||
| from lark import Transformer, v_args | |||||
| class JsonTreeToJson(Transformer): | |||||
| @v_args(inline=True) | |||||
| def string(self, s): | |||||
| return s[1:-1].replace('\\"', '"') | |||||
| array = list | |||||
| pair = tuple | |||||
| object = dict | |||||
| number = v_args(inline=True)(float) | |||||
| null = lambda self, _: None | |||||
| true = lambda self, _: True | |||||
| false = lambda self, _: False | |||||
| @@ -0,0 +1,51 @@ | |||||
| """ | |||||
| Grammar Composition | |||||
| =================== | |||||
| This example shows how to do grammar composition in Lark, by creating a new | |||||
| file format that allows both CSV and JSON to co-exist. | |||||
| 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||||
| and allows them to be used one after the other. | |||||
| In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||||
| which creates an implicit namespace and allows them to coexist without collisions. | |||||
| 2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||||
| The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||||
| The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||||
| This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||||
| or being imported, or who is doing the importing. | |||||
| """ | |||||
| from pathlib import Path | |||||
| from lark import Lark | |||||
| from json import dumps | |||||
| from lark.visitors import Transformer, merge_transformers | |||||
| from eval_csv import CsvTreeToPandasDict | |||||
| from eval_json import JsonTreeToJson | |||||
| __dir__ = Path(__file__).parent | |||||
| class Storage(Transformer): | |||||
| def start(self, children): | |||||
| return children | |||||
| storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
| parser = Lark.open("storage.lark", rel_to=__file__) | |||||
| def main(): | |||||
| json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
| res = storage_transformer.transform(json_tree) | |||||
| print("Just JSON: ", res) | |||||
| csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||||
| res = storage_transformer.transform(csv_json_tree) | |||||
| print("JSON + CSV: ", dumps(res, indent=2)) | |||||
| if __name__ == "__main__": | |||||
| main() | |||||
| @@ -1,8 +1,8 @@ | |||||
| start: csv__start | |||||
| | json__start | |||||
| start: (csv__start | json__start _NL?)+ | |||||
| // Renaming of the import variables is required, as they | // Renaming of the import variables is required, as they | ||||
| // receive the namespace of this file. | // receive the namespace of this file. | ||||
| // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | ||||
| %import .csv.start -> csv__start | %import .csv.start -> csv__start | ||||
| %import .csv._NL -> _NL | |||||
| %import .json.start -> json__start | %import .json.start -> json__start | ||||