@@ -103,12 +103,17 @@ v_args | |||
.. autofunction:: lark.visitors.v_args | |||
merge_transformers | |||
------------------ | |||
.. autofunction:: lark.visitors.merge_transformers | |||
Discard | |||
------- | |||
.. autoclass:: lark.visitors.Discard | |||
VisitError | |||
------- | |||
---------- | |||
.. autoclass:: lark.exceptions.VisitError |
@@ -0,0 +1,10 @@ | |||
Grammar Composition | |||
=================== | |||
This example shows how to do grammar composition in Lark, by creating a new | |||
file format that allows both CSV and JSON to co-exist. | |||
We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||
they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||
See [``main.py``](main.py) for more details. |
@@ -0,0 +1,6 @@ | |||
{"header": ["this", "is", "json", 1111]} | |||
# file lines author | |||
data.json 12 Robin | |||
data.csv 30 erezsh | |||
compiler.py 123123 Megalng | |||
{"footer": "done"} |
@@ -0,0 +1,14 @@ | |||
start: header _NL row+ | |||
header: "#" " "? (WORD _SEPARATOR?)+ | |||
row: (_anything _SEPARATOR?)+ _NL | |||
_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT | |||
NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ | |||
_SEPARATOR: /[ ]+/ | |||
| "\t" | |||
| "," | |||
%import common.NEWLINE -> _NL | |||
%import common.WORD | |||
%import common.INT | |||
%import common.FLOAT | |||
%import common.SIGNED_FLOAT |
@@ -0,0 +1,26 @@ | |||
"Transformer for evaluating csv.lark" | |||
from lark import Transformer | |||
class CsvTreeToPandasDict(Transformer): | |||
INT = int | |||
FLOAT = float | |||
SIGNED_FLOAT = float | |||
WORD = str | |||
NON_SEPARATOR_STRING = str | |||
def row(self, children): | |||
return children | |||
def start(self, children): | |||
data = {} | |||
header = children[0].children | |||
for heading in header: | |||
data[heading] = [] | |||
for row in children[1:]: | |||
for i, element in enumerate(row): | |||
data[header[i]].append(element) | |||
return data |
@@ -0,0 +1,17 @@ | |||
"Transformer for evaluating json.lark" | |||
from lark import Transformer, v_args | |||
class JsonTreeToJson(Transformer): | |||
@v_args(inline=True) | |||
def string(self, s): | |||
return s[1:-1].replace('\\"', '"') | |||
array = list | |||
pair = tuple | |||
object = dict | |||
number = v_args(inline=True)(float) | |||
null = lambda self, _: None | |||
true = lambda self, _: True | |||
false = lambda self, _: False |
@@ -0,0 +1,19 @@ | |||
?start: value | |||
?value: object | |||
| array | |||
| string | |||
| SIGNED_NUMBER -> number | |||
| "true" -> true | |||
| "false" -> false | |||
| "null" -> null | |||
array : "[" _WS? [value ("," _WS? value)*] "]" | |||
object : "{" _WS? [pair ("," _WS? pair)*] "}" | |||
pair : string ":" _WS value | |||
string : ESCAPED_STRING | |||
%import common.ESCAPED_STRING | |||
%import common.SIGNED_NUMBER | |||
%import common.WS -> _WS |
@@ -0,0 +1,51 @@ | |||
""" | |||
Grammar Composition | |||
=================== | |||
This example shows how to do grammar composition in Lark, by creating a new | |||
file format that allows both CSV and JSON to co-exist. | |||
1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||
and allows them to be used one after the other. | |||
In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||
which creates an implicit namespace and allows them to coexist without collisions. | |||
2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||
The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||
The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||
This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||
or being imported, or who is doing the importing. | |||
""" | |||
from pathlib import Path | |||
from lark import Lark | |||
from json import dumps | |||
from lark.visitors import Transformer, merge_transformers | |||
from eval_csv import CsvTreeToPandasDict | |||
from eval_json import JsonTreeToJson | |||
__dir__ = Path(__file__).parent | |||
class Storage(Transformer): | |||
def start(self, children): | |||
return children | |||
storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||
parser = Lark.open("storage.lark", rel_to=__file__) | |||
def main(): | |||
json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||
res = storage_transformer.transform(json_tree) | |||
print("Just JSON: ", res) | |||
csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||
res = storage_transformer.transform(csv_json_tree) | |||
print("JSON + CSV: ", dumps(res, indent=2)) | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1,8 @@ | |||
start: (csv__start | json__start _NL?)+ | |||
// Renaming of the import variables is required, as they | |||
// receive the namespace of this file. | |||
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | |||
%import .csv.start -> csv__start | |||
%import .csv._NL -> _NL | |||
%import .json.start -> json__start |
@@ -149,6 +149,59 @@ class Transformer(_Decoratable): | |||
return token | |||
def merge_transformers(base_transformer=None, **transformers_to_merge): | |||
"""Merge a collection of transformers into the base_transformer, each into its own 'namespace'. | |||
When called, it will collect the methods from each transformer, and assign them to base_transformer, | |||
with their name prefixed with the given keyword, as ``prefix__methodname`. | |||
This function is especially useful for processing grammars that import other grammars, | |||
thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix) | |||
In this case, the key for the transformer should match the name of the imported grammar. | |||
Parameters: | |||
base_transformer (Transformer, optional): The transformer that all other transformers will be added to. | |||
**transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. | |||
Raises: | |||
AttributeError: In case of a name collision in the merged methods | |||
Example: | |||
:: | |||
class TBase(Transformer): | |||
def start(self, children): | |||
return children[0] + 'bar' | |||
class TImportedGrammar(Transformer): | |||
def foo(self, children): | |||
return "foo" | |||
composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) | |||
t = Tree('start', [ Tree('imported__foo', []) ]) | |||
assert composed_transformer.transform(t) == 'foobar' | |||
""" | |||
if base_transformer is None: | |||
base_transformer = Transformer() | |||
for prefix, transformer in transformers_to_merge.items(): | |||
for method_name in dir(transformer): | |||
method = getattr(transformer, method_name) | |||
if not callable(method): | |||
continue | |||
if method_name.startswith("_") or method_name == "transform": | |||
continue | |||
prefixed_method = prefix + "__" + method_name | |||
if hasattr(base_transformer, prefixed_method): | |||
raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) | |||
setattr(base_transformer, prefixed_method, method) | |||
return base_transformer | |||
class InlineTransformer(Transformer): # XXX Deprecated | |||
def _call_userfunc(self, tree, new_children=None): | |||
# Assumes tree is already transformed | |||
@@ -9,7 +9,7 @@ import functools | |||
from lark.tree import Tree | |||
from lark.lexer import Token | |||
from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | |||
Transformer_InPlaceRecursive, Transformer_NonRecursive | |||
Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers | |||
class TestTrees(TestCase): | |||
@@ -233,21 +233,62 @@ class TestTrees(TestCase): | |||
x = MyTransformer().transform( t ) | |||
self.assertEqual(x, t2) | |||
def test_transformer_variants(self): | |||
tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | |||
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | |||
class T(base): | |||
def add(self, children): | |||
return sum(children) | |||
def N(self, token): | |||
return int(token) | |||
copied = copy.deepcopy(tree) | |||
result = T().transform(copied) | |||
self.assertEqual(result, Tree('start', [3, 7])) | |||
def test_merge_transformers(self): | |||
tree = Tree('start', [ | |||
Tree('main', [ | |||
Token("A", '1'), Token("B", '2') | |||
]), | |||
Tree("module__main", [ | |||
Token("A", "2"), Token("B", "3") | |||
]) | |||
]) | |||
class T1(Transformer): | |||
A = int | |||
B = int | |||
main = sum | |||
start = list | |||
def module__main(self, children): | |||
return sum(children) | |||
class T2(Transformer): | |||
A = int | |||
B = int | |||
main = sum | |||
start = list | |||
class T3(Transformer): | |||
def main(self, children): | |||
return sum(children) | |||
class T4(Transformer): | |||
main = sum | |||
t1_res = T1().transform(tree) | |||
composed_res = merge_transformers(T2(), module=T3()).transform(tree) | |||
self.assertEqual(t1_res, composed_res) | |||
composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) | |||
self.assertEqual(t1_res, composed_res2) | |||
with self.assertRaises(AttributeError): | |||
merge_transformers(T1(), module=T3()) | |||
if __name__ == '__main__': | |||
unittest.main() |