@@ -103,12 +103,17 @@ v_args | |||||
.. autofunction:: lark.visitors.v_args | .. autofunction:: lark.visitors.v_args | ||||
merge_transformers | |||||
------------------ | |||||
.. autofunction:: lark.visitors.merge_transformers | |||||
Discard | Discard | ||||
------- | ------- | ||||
.. autoclass:: lark.visitors.Discard | .. autoclass:: lark.visitors.Discard | ||||
VisitError | VisitError | ||||
------- | |||||
---------- | |||||
.. autoclass:: lark.exceptions.VisitError | .. autoclass:: lark.exceptions.VisitError |
@@ -0,0 +1,10 @@ | |||||
Grammar Composition | |||||
=================== | |||||
This example shows how to do grammar composition in Lark, by creating a new | |||||
file format that allows both CSV and JSON to co-exist. | |||||
We show how, by using namespaces, Lark grammars and their transformers can be fully reused - | |||||
they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. | |||||
See [``main.py``](main.py) for more details. |
@@ -0,0 +1,6 @@ | |||||
{"header": ["this", "is", "json", 1111]} | |||||
# file lines author | |||||
data.json 12 Robin | |||||
data.csv 30 erezsh | |||||
compiler.py 123123 Megalng | |||||
{"footer": "done"} |
@@ -0,0 +1,14 @@ | |||||
start: header _NL row+ | |||||
header: "#" " "? (WORD _SEPARATOR?)+ | |||||
row: (_anything _SEPARATOR?)+ _NL | |||||
_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT | |||||
NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ | |||||
_SEPARATOR: /[ ]+/ | |||||
| "\t" | |||||
| "," | |||||
%import common.NEWLINE -> _NL | |||||
%import common.WORD | |||||
%import common.INT | |||||
%import common.FLOAT | |||||
%import common.SIGNED_FLOAT |
@@ -0,0 +1,26 @@ | |||||
"Transformer for evaluating csv.lark" | |||||
from lark import Transformer | |||||
class CsvTreeToPandasDict(Transformer): | |||||
INT = int | |||||
FLOAT = float | |||||
SIGNED_FLOAT = float | |||||
WORD = str | |||||
NON_SEPARATOR_STRING = str | |||||
def row(self, children): | |||||
return children | |||||
def start(self, children): | |||||
data = {} | |||||
header = children[0].children | |||||
for heading in header: | |||||
data[heading] = [] | |||||
for row in children[1:]: | |||||
for i, element in enumerate(row): | |||||
data[header[i]].append(element) | |||||
return data |
@@ -0,0 +1,17 @@ | |||||
"Transformer for evaluating json.lark" | |||||
from lark import Transformer, v_args | |||||
class JsonTreeToJson(Transformer): | |||||
@v_args(inline=True) | |||||
def string(self, s): | |||||
return s[1:-1].replace('\\"', '"') | |||||
array = list | |||||
pair = tuple | |||||
object = dict | |||||
number = v_args(inline=True)(float) | |||||
null = lambda self, _: None | |||||
true = lambda self, _: True | |||||
false = lambda self, _: False |
@@ -0,0 +1,19 @@ | |||||
?start: value | |||||
?value: object | |||||
| array | |||||
| string | |||||
| SIGNED_NUMBER -> number | |||||
| "true" -> true | |||||
| "false" -> false | |||||
| "null" -> null | |||||
array : "[" _WS? [value ("," _WS? value)*] "]" | |||||
object : "{" _WS? [pair ("," _WS? pair)*] "}" | |||||
pair : string ":" _WS value | |||||
string : ESCAPED_STRING | |||||
%import common.ESCAPED_STRING | |||||
%import common.SIGNED_NUMBER | |||||
%import common.WS -> _WS |
@@ -0,0 +1,51 @@ | |||||
""" | |||||
Grammar Composition | |||||
=================== | |||||
This example shows how to do grammar composition in Lark, by creating a new | |||||
file format that allows both CSV and JSON to co-exist. | |||||
1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, | |||||
and allows them to be used one after the other. | |||||
In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), | |||||
which creates an implicit namespace and allows them to coexist without collisions. | |||||
2) We merge their respective transformers (unaware of each other) into a new base transformer. | |||||
The resulting transformer can evaluate both JSON and CSV in the parse tree. | |||||
The methods of each transformer are renamed into their appropriate namespace, using the given prefix. | |||||
This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly, | |||||
or being imported, or who is doing the importing. | |||||
""" | |||||
from pathlib import Path | |||||
from lark import Lark | |||||
from json import dumps | |||||
from lark.visitors import Transformer, merge_transformers | |||||
from eval_csv import CsvTreeToPandasDict | |||||
from eval_json import JsonTreeToJson | |||||
__dir__ = Path(__file__).parent | |||||
class Storage(Transformer): | |||||
def start(self, children): | |||||
return children | |||||
storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) | |||||
parser = Lark.open("storage.lark", rel_to=__file__) | |||||
def main(): | |||||
json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) | |||||
res = storage_transformer.transform(json_tree) | |||||
print("Just JSON: ", res) | |||||
csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) | |||||
res = storage_transformer.transform(csv_json_tree) | |||||
print("JSON + CSV: ", dumps(res, indent=2)) | |||||
if __name__ == "__main__": | |||||
main() |
@@ -0,0 +1,8 @@ | |||||
start: (csv__start | json__start _NL?)+ | |||||
// Renaming of the import variables is required, as they | |||||
// receive the namespace of this file. | |||||
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 | |||||
%import .csv.start -> csv__start | |||||
%import .csv._NL -> _NL | |||||
%import .json.start -> json__start |
@@ -149,6 +149,59 @@ class Transformer(_Decoratable): | |||||
return token | return token | ||||
def merge_transformers(base_transformer=None, **transformers_to_merge): | |||||
"""Merge a collection of transformers into the base_transformer, each into its own 'namespace'. | |||||
When called, it will collect the methods from each transformer, and assign them to base_transformer, | |||||
with their name prefixed with the given keyword, as ``prefix__methodname`. | |||||
This function is especially useful for processing grammars that import other grammars, | |||||
thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix) | |||||
In this case, the key for the transformer should match the name of the imported grammar. | |||||
Parameters: | |||||
base_transformer (Transformer, optional): The transformer that all other transformers will be added to. | |||||
**transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``. | |||||
Raises: | |||||
AttributeError: In case of a name collision in the merged methods | |||||
Example: | |||||
:: | |||||
class TBase(Transformer): | |||||
def start(self, children): | |||||
return children[0] + 'bar' | |||||
class TImportedGrammar(Transformer): | |||||
def foo(self, children): | |||||
return "foo" | |||||
composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar()) | |||||
t = Tree('start', [ Tree('imported__foo', []) ]) | |||||
assert composed_transformer.transform(t) == 'foobar' | |||||
""" | |||||
if base_transformer is None: | |||||
base_transformer = Transformer() | |||||
for prefix, transformer in transformers_to_merge.items(): | |||||
for method_name in dir(transformer): | |||||
method = getattr(transformer, method_name) | |||||
if not callable(method): | |||||
continue | |||||
if method_name.startswith("_") or method_name == "transform": | |||||
continue | |||||
prefixed_method = prefix + "__" + method_name | |||||
if hasattr(base_transformer, prefixed_method): | |||||
raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) | |||||
setattr(base_transformer, prefixed_method, method) | |||||
return base_transformer | |||||
class InlineTransformer(Transformer): # XXX Deprecated | class InlineTransformer(Transformer): # XXX Deprecated | ||||
def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
# Assumes tree is already transformed | # Assumes tree is already transformed | ||||
@@ -9,7 +9,7 @@ import functools | |||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.lexer import Token | from lark.lexer import Token | ||||
from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \ | ||||
Transformer_InPlaceRecursive, Transformer_NonRecursive | |||||
Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers | |||||
class TestTrees(TestCase): | class TestTrees(TestCase): | ||||
@@ -233,21 +233,62 @@ class TestTrees(TestCase): | |||||
x = MyTransformer().transform( t ) | x = MyTransformer().transform( t ) | ||||
self.assertEqual(x, t2) | self.assertEqual(x, t2) | ||||
def test_transformer_variants(self): | def test_transformer_variants(self): | ||||
tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])]) | ||||
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | ||||
class T(base): | class T(base): | ||||
def add(self, children): | def add(self, children): | ||||
return sum(children) | return sum(children) | ||||
def N(self, token): | def N(self, token): | ||||
return int(token) | return int(token) | ||||
copied = copy.deepcopy(tree) | copied = copy.deepcopy(tree) | ||||
result = T().transform(copied) | result = T().transform(copied) | ||||
self.assertEqual(result, Tree('start', [3, 7])) | self.assertEqual(result, Tree('start', [3, 7])) | ||||
def test_merge_transformers(self): | |||||
tree = Tree('start', [ | |||||
Tree('main', [ | |||||
Token("A", '1'), Token("B", '2') | |||||
]), | |||||
Tree("module__main", [ | |||||
Token("A", "2"), Token("B", "3") | |||||
]) | |||||
]) | |||||
class T1(Transformer): | |||||
A = int | |||||
B = int | |||||
main = sum | |||||
start = list | |||||
def module__main(self, children): | |||||
return sum(children) | |||||
class T2(Transformer): | |||||
A = int | |||||
B = int | |||||
main = sum | |||||
start = list | |||||
class T3(Transformer): | |||||
def main(self, children): | |||||
return sum(children) | |||||
class T4(Transformer): | |||||
main = sum | |||||
t1_res = T1().transform(tree) | |||||
composed_res = merge_transformers(T2(), module=T3()).transform(tree) | |||||
self.assertEqual(t1_res, composed_res) | |||||
composed_res2 = merge_transformers(T2(), module=T4()).transform(tree) | |||||
self.assertEqual(t1_res, composed_res2) | |||||
with self.assertRaises(AttributeError): | |||||
merge_transformers(T1(), module=T3()) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
unittest.main() | unittest.main() |