@@ -1,67 +0,0 @@ | |||||
""" | |||||
Transformer merging | |||||
================== | |||||
This example is intended to show how transformers can be merged in order to | |||||
keep the individual steps clean and simple. | |||||
.. note:: | |||||
The imported rules will have to be aliased according to the file it is in. | |||||
(See `storage.lark` for an implementation of this idea.) | |||||
""" | |||||
from lark import Lark, Tree | |||||
from json import dumps | |||||
from lark.visitors import Transformer, merge_transformers, v_args | |||||
class JsonTreeToJson(Transformer): | |||||
@v_args(inline=True) | |||||
def string(self, s): | |||||
return s[1:-1].replace('\\"', '"') | |||||
array = list | |||||
pair = tuple | |||||
object = dict | |||||
number = v_args(inline=True)(float) | |||||
null = lambda self, _: None | |||||
true = lambda self, _: True | |||||
false = lambda self, _: False | |||||
class CsvTreeToPandasDict(Transformer): | |||||
INT = int | |||||
FLOAT = float | |||||
SIGNED_FLOAT = float | |||||
WORD = str | |||||
NON_SEPARATOR_STRING = str | |||||
def row(self, children): | |||||
return children | |||||
def start(self, children): | |||||
data = {} | |||||
header = children[0].children | |||||
for heading in header: | |||||
data[heading] = [] | |||||
for row in children[1:]: | |||||
for i, element in enumerate(row): | |||||
data[header[i]].append(element) | |||||
return data | |||||
class Base(Transformer): | |||||
def start(self, children): | |||||
return children[0] | |||||
if __name__ == "__main__": | |||||
merged = merge_transformers(Base(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
parser = Lark.open("storage.lark") | |||||
csv_tree = parser.parse("""# file lines author | |||||
data.json 12 Robin | |||||
data.csv 30 erezsh | |||||
compiler.py 123123 Megalng | |||||
""") | |||||
print("CSV data in pandas form:", merged.transform(csv_tree)) | |||||
json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
print("JSON data transformed: ", merged.transform(json_tree)) |
@@ -0,0 +1,10 @@ | |||||
Grammar Composition | |||||
=================== | |||||
This example shows how to do grammar composition in Lark, by creating a new | |||||
file format that allows both CSV and JSON to co-exist. | |||||
We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||||
they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||||
See [``main.py``](main.py) for more details. |
@@ -0,0 +1,6 @@ | |||||
{"header": ["this", "is", "json", 1111]} | |||||
# file lines author | |||||
data.json 12 Robin | |||||
data.csv 30 erezsh | |||||
compiler.py 123123 Megalng | |||||
{"footer": "done"} |
@@ -0,0 +1,24 @@ | |||||
from lark import Transformer | |||||
class CsvTreeToPandasDict(Transformer): | |||||
INT = int | |||||
FLOAT = float | |||||
SIGNED_FLOAT = float | |||||
WORD = str | |||||
NON_SEPARATOR_STRING = str | |||||
def row(self, children): | |||||
return children | |||||
def start(self, children): | |||||
data = {} | |||||
header = children[0].children | |||||
for heading in header: | |||||
data[heading] = [] | |||||
for row in children[1:]: | |||||
for i, element in enumerate(row): | |||||
data[header[i]].append(element) | |||||
return data |
@@ -0,0 +1,15 @@ | |||||
from lark import Transformer, v_args | |||||
class JsonTreeToJson(Transformer): | |||||
@v_args(inline=True) | |||||
def string(self, s): | |||||
return s[1:-1].replace('\\"', '"') | |||||
array = list | |||||
pair = tuple | |||||
object = dict | |||||
number = v_args(inline=True)(float) | |||||
null = lambda self, _: None | |||||
true = lambda self, _: True | |||||
false = lambda self, _: False |
@@ -0,0 +1,51 @@ | |||||
""" | |||||
Grammar Composition | |||||
=================== | |||||
This example shows how to do grammar composition in Lark, by creating a new | |||||
file format that allows both CSV and JSON to co-exist. | |||||
1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||||
and allows them to be used one after the other. | |||||
In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||||
which creates an implicit namespace and allows them to coexist without collisions. | |||||
2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||||
The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||||
The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||||
This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||||
or being imported, or who is doing the importing. | |||||
""" | |||||
from pathlib import Path | |||||
from lark import Lark | |||||
from json import dumps | |||||
from lark.visitors import Transformer, merge_transformers | |||||
from eval_csv import CsvTreeToPandasDict | |||||
from eval_json import JsonTreeToJson | |||||
__dir__ = Path(__file__).parent | |||||
class Storage(Transformer): | |||||
def start(self, children): | |||||
return children | |||||
storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
parser = Lark.open("storage.lark", rel_to=__file__) | |||||
def main(): | |||||
json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
res = storage_transformer.transform(json_tree) | |||||
print("Just JSON: ", res) | |||||
csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||||
res = storage_transformer.transform(csv_json_tree) | |||||
print("JSON + CSV: ", dumps(res, indent=2)) | |||||
if __name__ == "__main__": | |||||
main() |
@@ -1,8 +1,8 @@ | |||||
start: csv__start | |||||
| json__start | |||||
start: (csv__start | json__start _NL?)+ | |||||
// Renaming of the import variables is required, as they | // Renaming of the import variables is required, as they | ||||
// receive the namespace of this file. | // receive the namespace of this file. | ||||
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | ||||
%import .csv.start -> csv__start | %import .csv.start -> csv__start | ||||
%import .csv._NL -> _NL | |||||
%import .json.start -> json__start | %import .json.start -> json__start |