From ca1131a3a19ce2817dc95131333e81d4d20b168c Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sat, 28 Aug 2021 11:13:01 +0100 Subject: [PATCH] Examples: Moved example into 'composition' folder, and improved it --- examples/advanced/advanced_transformers.py | 67 ------------------- examples/composition/README.md | 10 +++ .../composition/combined_csv_and_json.txt | 6 ++ examples/{advanced => composition}/csv.lark | 0 examples/composition/eval_csv.py | 24 +++++++ examples/composition/eval_json.py | 15 +++++ examples/{advanced => composition}/json.lark | 0 examples/composition/main.py | 51 ++++++++++++++ .../{advanced => composition}/storage.lark | 4 +- 9 files changed, 108 insertions(+), 69 deletions(-) delete mode 100644 examples/advanced/advanced_transformers.py create mode 100644 examples/composition/README.md create mode 100644 examples/composition/combined_csv_and_json.txt rename examples/{advanced => composition}/csv.lark (100%) create mode 100644 examples/composition/eval_csv.py create mode 100644 examples/composition/eval_json.py rename examples/{advanced => composition}/json.lark (100%) create mode 100644 examples/composition/main.py rename examples/{advanced => composition}/storage.lark (79%) diff --git a/examples/advanced/advanced_transformers.py b/examples/advanced/advanced_transformers.py deleted file mode 100644 index 9810f44..0000000 --- a/examples/advanced/advanced_transformers.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Transformer merging -================== - -This example is intended to show how transformers can be merged in order to -keep the individual steps clean and simple. - -.. note:: - The imported rules will have to be aliased according to the file it is in. - (See `storage.lark` for an implementation of this idea.) -""" -from lark import Lark, Tree -from json import dumps -from lark.visitors import Transformer, merge_transformers, v_args - -class JsonTreeToJson(Transformer): - @v_args(inline=True) - def string(self, s): - return s[1:-1].replace('\\"', '"') - - array = list - pair = tuple - object = dict - number = v_args(inline=True)(float) - - null = lambda self, _: None - true = lambda self, _: True - false = lambda self, _: False - -class CsvTreeToPandasDict(Transformer): - INT = int - FLOAT = float - SIGNED_FLOAT = float - WORD = str - NON_SEPARATOR_STRING = str - - def row(self, children): - return children - - def start(self, children): - data = {} - - header = children[0].children - for heading in header: - data[heading] = [] - - for row in children[1:]: - for i, element in enumerate(row): - data[header[i]].append(element) - - return data - -class Base(Transformer): - def start(self, children): - return children[0] - -if __name__ == "__main__": - merged = merge_transformers(Base(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) - parser = Lark.open("storage.lark") - csv_tree = parser.parse("""# file lines author -data.json 12 Robin -data.csv 30 erezsh -compiler.py 123123 Megalng -""") - print("CSV data in pandas form:", merged.transform(csv_tree)) - json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) - print("JSON data transformed: ", merged.transform(json_tree)) diff --git a/examples/composition/README.md b/examples/composition/README.md new file mode 100644 index 0000000..259a66a --- /dev/null +++ b/examples/composition/README.md @@ -0,0 +1,10 @@ +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +We show how, by using namespaces, Lark grammars and their transformers can be fully reused - +they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. + +See [``main.py``](main.py) for more details. \ No newline at end of file diff --git a/examples/composition/combined_csv_and_json.txt b/examples/composition/combined_csv_and_json.txt new file mode 100644 index 0000000..5b8df82 --- /dev/null +++ b/examples/composition/combined_csv_and_json.txt @@ -0,0 +1,6 @@ +{"header": ["this", "is", "json", 1111]} +# file lines author +data.json 12 Robin +data.csv 30 erezsh +compiler.py 123123 Megalng +{"footer": "done"} diff --git a/examples/advanced/csv.lark b/examples/composition/csv.lark similarity index 100% rename from examples/advanced/csv.lark rename to examples/composition/csv.lark diff --git a/examples/composition/eval_csv.py b/examples/composition/eval_csv.py new file mode 100644 index 0000000..3323936 --- /dev/null +++ b/examples/composition/eval_csv.py @@ -0,0 +1,24 @@ +from lark import Transformer + +class CsvTreeToPandasDict(Transformer): + INT = int + FLOAT = float + SIGNED_FLOAT = float + WORD = str + NON_SEPARATOR_STRING = str + + def row(self, children): + return children + + def start(self, children): + data = {} + + header = children[0].children + for heading in header: + data[heading] = [] + + for row in children[1:]: + for i, element in enumerate(row): + data[header[i]].append(element) + + return data diff --git a/examples/composition/eval_json.py b/examples/composition/eval_json.py new file mode 100644 index 0000000..26bf501 --- /dev/null +++ b/examples/composition/eval_json.py @@ -0,0 +1,15 @@ +from lark import Transformer, v_args + +class JsonTreeToJson(Transformer): + @v_args(inline=True) + def string(self, s): + return s[1:-1].replace('\\"', '"') + + array = list + pair = tuple + object = dict + number = v_args(inline=True)(float) + + null = lambda self, _: None + true = lambda self, _: True + false = lambda self, _: False diff --git a/examples/advanced/json.lark b/examples/composition/json.lark similarity index 100% rename from examples/advanced/json.lark rename to examples/composition/json.lark diff --git a/examples/composition/main.py b/examples/composition/main.py new file mode 100644 index 0000000..a549abe --- /dev/null +++ b/examples/composition/main.py @@ -0,0 +1,51 @@ +""" +Grammar Composition +=================== + +This example shows how to do grammar composition in Lark, by creating a new +file format that allows both CSV and JSON to co-exist. + +1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, + and allows them to be used one after the other. + + In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), + which creates an implicit namespace and allows them to coexist without collisions. + +2) We merge their respective transformers (unaware of each other) into a new base transformer. + The resulting transformer can evaluate both JSON and CSV in the parse tree. + + The methods of each transformer are renamed into their appropriate namespace, using the given prefix. + This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, + or being imported, or who is doing the importing. + +""" +from pathlib import Path +from lark import Lark +from json import dumps +from lark.visitors import Transformer, merge_transformers + +from eval_csv import CsvTreeToPandasDict +from eval_json import JsonTreeToJson + +__dir__ = Path(__file__).parent + +class Storage(Transformer): + def start(self, children): + return children + +storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) + +parser = Lark.open("storage.lark", rel_to=__file__) + +def main(): + json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) + res = storage_transformer.transform(json_tree) + print("Just JSON: ", res) + + csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) + res = storage_transformer.transform(csv_json_tree) + print("JSON + CSV: ", dumps(res, indent=2)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/advanced/storage.lark b/examples/composition/storage.lark similarity index 79% rename from examples/advanced/storage.lark rename to examples/composition/storage.lark index 64718ed..8e2bacc 100644 --- a/examples/advanced/storage.lark +++ b/examples/composition/storage.lark @@ -1,8 +1,8 @@ -start: csv__start - | json__start +start: (csv__start | json__start _NL?)+ // Renaming of the import variables is required, as they // receive the namespace of this file. // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 %import .csv.start -> csv__start +%import .csv._NL -> _NL %import .json.start -> json__start