Browse Source

Merge pull request #976 from lark-parser/merge_transformers

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Shinan 3 years ago
committed by GitHub
parent
commit
94dcec4c36
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 255 additions and 5 deletions
  1. +6
    -1
      docs/visitors.rst
  2. +10
    -0
      examples/composition/README.md
  3. +6
    -0
      examples/composition/combined_csv_and_json.txt
  4. +14
    -0
      examples/composition/csv.lark
  5. +26
    -0
      examples/composition/eval_csv.py
  6. +17
    -0
      examples/composition/eval_json.py
  7. +19
    -0
      examples/composition/json.lark
  8. +51
    -0
      examples/composition/main.py
  9. +8
    -0
      examples/composition/storage.lark
  10. +53
    -0
      lark/visitors.py
  11. +45
    -4
      tests/test_trees.py

+ 6
- 1
docs/visitors.rst View File

@@ -103,12 +103,17 @@ v_args

.. autofunction:: lark.visitors.v_args

merge_transformers
------------------

.. autofunction:: lark.visitors.merge_transformers

Discard
-------

.. autoclass:: lark.visitors.Discard

VisitError
-------
----------

.. autoclass:: lark.exceptions.VisitError

+ 10
- 0
examples/composition/README.md View File

@@ -0,0 +1,10 @@
Grammar Composition
===================
This example shows how to do grammar composition in Lark, by creating a new
file format that allows both CSV and JSON to co-exist.
We show how, by using namespaces, Lark grammars and their transformers can be fully reused -
they don't need to care if their grammar is used directly, or being imported, or who is doing the importing.
See [``main.py``](main.py) for more details.

+ 6
- 0
examples/composition/combined_csv_and_json.txt View File

@@ -0,0 +1,6 @@
{"header": ["this", "is", "json", 1111]}
# file lines author
data.json 12 Robin
data.csv 30 erezsh
compiler.py 123123 Megalng
{"footer": "done"}

+ 14
- 0
examples/composition/csv.lark View File

@@ -0,0 +1,14 @@
start: header _NL row+
header: "#" " "? (WORD _SEPARATOR?)+
row: (_anything _SEPARATOR?)+ _NL
_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT
NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/
_SEPARATOR: /[ ]+/
| "\t"
| ","

%import common.NEWLINE -> _NL
%import common.WORD
%import common.INT
%import common.FLOAT
%import common.SIGNED_FLOAT

+ 26
- 0
examples/composition/eval_csv.py View File

@@ -0,0 +1,26 @@
"Transformer for evaluating csv.lark"
from lark import Transformer
class CsvTreeToPandasDict(Transformer):
INT = int
FLOAT = float
SIGNED_FLOAT = float
WORD = str
NON_SEPARATOR_STRING = str
def row(self, children):
return children
def start(self, children):
data = {}
header = children[0].children
for heading in header:
data[heading] = []
for row in children[1:]:
for i, element in enumerate(row):
data[header[i]].append(element)
return data

+ 17
- 0
examples/composition/eval_json.py View File

@@ -0,0 +1,17 @@
"Transformer for evaluating json.lark"
from lark import Transformer, v_args
class JsonTreeToJson(Transformer):
@v_args(inline=True)
def string(self, s):
return s[1:-1].replace('\\"', '"')
array = list
pair = tuple
object = dict
number = v_args(inline=True)(float)
null = lambda self, _: None
true = lambda self, _: True
false = lambda self, _: False

+ 19
- 0
examples/composition/json.lark View File

@@ -0,0 +1,19 @@
?start: value

?value: object
| array
| string
| SIGNED_NUMBER -> number
| "true" -> true
| "false" -> false
| "null" -> null

array : "[" _WS? [value ("," _WS? value)*] "]"
object : "{" _WS? [pair ("," _WS? pair)*] "}"
pair : string ":" _WS value

string : ESCAPED_STRING

%import common.ESCAPED_STRING
%import common.SIGNED_NUMBER
%import common.WS -> _WS

+ 51
- 0
examples/composition/main.py View File

@@ -0,0 +1,51 @@
"""
Grammar Composition
===================

This example shows how to do grammar composition in Lark, by creating a new
file format that allows both CSV and JSON to co-exist.

1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``,
and allows them to be used one after the other.

In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__),
which creates an implicit namespace and allows them to coexist without collisions.

2) We merge their respective transformers (unaware of each other) into a new base transformer.
The resulting transformer can evaluate both JSON and CSV in the parse tree.

The methods of each transformer are renamed into their appropriate namespace, using the given prefix.
This appraoch allows full re-use: the transformers don't need to care if their grammar is used directly,
or being imported, or who is doing the importing.

"""
from pathlib import Path
from lark import Lark
from json import dumps
from lark.visitors import Transformer, merge_transformers

from eval_csv import CsvTreeToPandasDict
from eval_json import JsonTreeToJson

__dir__ = Path(__file__).parent

class Storage(Transformer):
def start(self, children):
return children

storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson())

parser = Lark.open("storage.lark", rel_to=__file__)

def main():
json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }}))
res = storage_transformer.transform(json_tree)
print("Just JSON: ", res)

csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read())
res = storage_transformer.transform(csv_json_tree)
print("JSON + CSV: ", dumps(res, indent=2))


if __name__ == "__main__":
main()

+ 8
- 0
examples/composition/storage.lark View File

@@ -0,0 +1,8 @@
start: (csv__start | json__start _NL?)+

// Renaming of the import variables is required, as they
// receive the namespace of this file.
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
%import .csv.start -> csv__start
%import .csv._NL -> _NL
%import .json.start -> json__start

+ 53
- 0
lark/visitors.py View File

@@ -149,6 +149,59 @@ class Transformer(_Decoratable):
return token


def merge_transformers(base_transformer=None, **transformers_to_merge):
"""Merge a collection of transformers into the base_transformer, each into its own 'namespace'.

When called, it will collect the methods from each transformer, and assign them to base_transformer,
with their name prefixed with the given keyword, as ``prefix__methodname`.

This function is especially useful for processing grammars that import other grammars,
thereby creating some of their rules in a 'namespace'. (i.e with a consitent name prefix)
In this case, the key for the transformer should match the name of the imported grammar.

Parameters:
base_transformer (Transformer, optional): The transformer that all other transformers will be added to.
**transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``.

Raises:
AttributeError: In case of a name collision in the merged methods

Example:
::

class TBase(Transformer):
def start(self, children):
return children[0] + 'bar'

class TImportedGrammar(Transformer):
def foo(self, children):
return "foo"

composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar())

t = Tree('start', [ Tree('imported__foo', []) ])

assert composed_transformer.transform(t) == 'foobar'

"""
if base_transformer is None:
base_transformer = Transformer()
for prefix, transformer in transformers_to_merge.items():
for method_name in dir(transformer):
method = getattr(transformer, method_name)
if not callable(method):
continue
if method_name.startswith("_") or method_name == "transform":
continue
prefixed_method = prefix + "__" + method_name
if hasattr(base_transformer, prefixed_method):
raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)

setattr(base_transformer, prefixed_method, method)

return base_transformer


class InlineTransformer(Transformer): # XXX Deprecated
def _call_userfunc(self, tree, new_children=None):
# Assumes tree is already transformed


+ 45
- 4
tests/test_trees.py View File

@@ -9,7 +9,7 @@ import functools
from lark.tree import Tree
from lark.lexer import Token
from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard, Transformer_InPlace, \
Transformer_InPlaceRecursive, Transformer_NonRecursive
Transformer_InPlaceRecursive, Transformer_NonRecursive, merge_transformers


class TestTrees(TestCase):
@@ -233,21 +233,62 @@ class TestTrees(TestCase):

x = MyTransformer().transform( t )
self.assertEqual(x, t2)
def test_transformer_variants(self):
tree = Tree('start', [Tree('add', [Token('N', '1'), Token('N', '2')]), Tree('add', [Token('N', '3'), Token('N', '4')])])
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
class T(base):
def add(self, children):
return sum(children)
def N(self, token):
return int(token)
copied = copy.deepcopy(tree)
result = T().transform(copied)
self.assertEqual(result, Tree('start', [3, 7]))

def test_merge_transformers(self):
tree = Tree('start', [
Tree('main', [
Token("A", '1'), Token("B", '2')
]),
Tree("module__main", [
Token("A", "2"), Token("B", "3")
])
])

class T1(Transformer):
A = int
B = int
main = sum
start = list
def module__main(self, children):
return sum(children)

class T2(Transformer):
A = int
B = int
main = sum
start = list

class T3(Transformer):
def main(self, children):
return sum(children)

class T4(Transformer):
main = sum


t1_res = T1().transform(tree)
composed_res = merge_transformers(T2(), module=T3()).transform(tree)
self.assertEqual(t1_res, composed_res)

composed_res2 = merge_transformers(T2(), module=T4()).transform(tree)
self.assertEqual(t1_res, composed_res2)

with self.assertRaises(AttributeError):
merge_transformers(T1(), module=T3())

if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save