diff --git a/examples/advanced/extend_python.py b/examples/advanced/extend_python.py index ba5fa21..708eb81 100644 --- a/examples/advanced/extend_python.py +++ b/examples/advanced/extend_python.py @@ -11,7 +11,7 @@ from lark.lark import Lark from python_parser import PythonIndenter GRAMMAR = r""" -%import .python3 (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT) +%import python (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT) %extend compound_stmt: match_stmt diff --git a/examples/advanced/py3to2.py b/examples/advanced/py3to2.py new file mode 100644 index 0000000..59a6f91 --- /dev/null +++ b/examples/advanced/py3to2.py @@ -0,0 +1,93 @@ +""" +Python 3 to Python 2 converter (tree templates) +=============================================== + +This example demonstrates how to translate between two trees using tree templates. +It parses Python 3, translates it to a Python 2 AST, and then outputs the result as Python 2 code. + +Uses reconstruct_python.py for generating the final Python 2 code. +""" + + +from lark import Lark +from lark.tree_templates import TemplateConf, TemplateTranslator + +from reconstruct_python import PythonIndenter, PythonReconstructor + + +# +# 1. Define a Python parser that also accepts template vars in the code (in the form of $var) +# +TEMPLATED_PYTHON = r""" +%import python (single_input, file_input, eval_input, atom, var, stmt, expr, testlist_star_expr, _NEWLINE, _INDENT, _DEDENT, COMMENT, NAME) + +%extend atom: TEMPLATE_NAME -> var + +TEMPLATE_NAME: "$" NAME + +?template_start: (stmt | testlist_star_expr _NEWLINE) + +%ignore /[\t \f]+/ // WS +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT +%ignore COMMENT +""" + +parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False) + + +def parse_template(s): + return parser.parse(s + '\n', start='template_start') + +def parse_code(s): + return parser.parse(s + '\n', start='file_input') + + +# +# 2. Define translations using templates (each template code is parsed to a template tree) +# + +pytemplate = TemplateConf(parse=parse_template) + +translations_3to2 = { + 'yield from $a': + 'for _tmp in $a: yield _tmp', + + 'raise $e from $x': + 'raise $e', + + '$a / $b': + 'float($a) / $b', +} +translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()} + +# +# 3. Translate and reconstruct Python 3 code into valid Python 2 code +# + +python_reconstruct = PythonReconstructor(parser) + +def translate_py3to2(code): + tree = parse_code(code) + tree = TemplateTranslator(translations_3to2).translate(tree) + return python_reconstruct.reconstruct(tree) + + +# +# Test Code +# + +_TEST_CODE = ''' +if a / 2 > 1: + yield from [1,2,3] +else: + raise ValueError(a) from e + +''' + +def test(): + print(_TEST_CODE) + print(' -----> ') + print(translate_py3to2(_TEST_CODE)) + +if __name__ == '__main__': + test() \ No newline at end of file diff --git a/examples/advanced/reconstruct_python.py b/examples/advanced/reconstruct_python.py index c962dd0..76dfd9f 100644 --- a/examples/advanced/reconstruct_python.py +++ b/examples/advanced/reconstruct_python.py @@ -8,10 +8,15 @@ a small formatter. """ -from lark import Token +from lark import Lark, Token from lark.reconstruct import Reconstructor -from python_parser import python_parser3 +from python_parser import PythonIndenter + +# Official Python grammar by Lark +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], + parser='lalr', postlex=PythonIndenter(), + start='file_input', maybe_placeholders=False) SPACE_AFTER = set(',+-*/~@<>="|:') @@ -53,16 +58,26 @@ def postproc(items): yield "\n" -python_reconstruct = Reconstructor(python_parser3, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special}) +class PythonReconstructor: + def __init__(self, parser): + self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special}) + + def reconstruct(self, tree): + return self._recons.reconstruct(tree, postproc) def test(): + python_reconstructor = PythonReconstructor(python_parser3) + self_contents = open(__file__).read() tree = python_parser3.parse(self_contents+'\n') - output = python_reconstruct.reconstruct(tree, postproc) + output = python_reconstructor.reconstruct(tree) tree_new = python_parser3.parse(output) + print(tree.pretty()) + print(tree_new.pretty()) + assert tree == tree_new print(output) diff --git a/lark/tree_templates.py b/lark/tree_templates.py new file mode 100644 index 0000000..709e50f --- /dev/null +++ b/lark/tree_templates.py @@ -0,0 +1,154 @@ +"""This module defines utilities for matching and translation tree templates. + +A tree templates is a tree that contains nodes that are template variables. + +""" + +from typing import Union, Optional, Mapping + +from lark import Tree, Transformer + +TreeOrCode = Union[Tree, str] + +class TemplateConf: + """Template Configuration + + Allows customization for different uses of Template + """ + + def __init__(self, parse=None): + self._parse = parse + + + def test_var(self, var: Union[Tree, str]) -> Optional[str]: + """Given a tree node, if it is a template variable return its name. Otherwise, return None. + + This method may be overridden for customization + + Parameters: + var: Tree | str - The tree node to test + + """ + if isinstance(var, str) and var.startswith('$'): + return var.lstrip('$') + + if isinstance(var, Tree) and var.data == 'var' and var.children[0].startswith('$'): + return var.children[0].lstrip('$') + + + def _get_tree(self, template: TreeOrCode): + if isinstance(template, str): + assert self._parse + template = self._parse(template) + + assert isinstance(template, Tree) + return template + + def __call__(self, template): + return Template(template, conf=self) + + def _match_tree_template(self, template, tree): + template_var = self.test_var(template) + if template_var: + return {template_var: tree} + + if isinstance(template, str): + if template == tree: + return {} + return + + assert isinstance(template, Tree), template + + if template.data == tree.data and len(template.children) == len(tree.children): + res = {} + for t1, t2 in zip(template.children, tree.children): + matches = self._match_tree_template(t1, t2) + if matches is None: + return + + res.update(matches) + + return res + + + +class _ReplaceVars(Transformer): + def __init__(self, conf, vars): + self.conf = conf + self.vars = vars + + def __default__(self, data, children, meta): + tree = super().__default__(data, children, meta) + + var = self.conf.test_var(tree) + if var: + return self.vars[var] + return tree + + +class Template: + """Represents a tree templates, tied to a specific configuration + + A tree template is a tree that contains nodes that are template variables. + Those variables will match any tree. + (future versions may support annotations on the variables, to allow more complex templates) + """ + + def __init__(self, tree: Tree, conf = TemplateConf()): + self.conf = conf + self.tree = conf._get_tree(tree) + + def match(self, tree: TreeOrCode): + """Match a tree template to a tree. + + A tree template without variables will only match ``tree`` if it is equal to the template. + + Parameters: + tree (Tree): The tree to match to the template + + Returns: + Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping + template variable names to their matching tree nodes. + If no match was found, returns None. + """ + tree = self.conf._get_tree(tree) + return self.conf._match_tree_template(self.tree, tree) + + def search(self, tree: TreeOrCode): + """Search for all occurances of the tree template inside ``tree``. + """ + tree = self.conf._get_tree(tree) + for subtree in tree.iter_subtrees(): + res = self.match(subtree) + if res: + yield subtree, res + + def apply_vars(self, vars: Mapping[str, Tree]): + """Apply vars to the template tree + """ + return _ReplaceVars(self.conf, vars).transform(self.tree) + + +def translate(t1: Template, t2: Template, tree: TreeOrCode): + """Search tree and translate each occurrance of t1 into t2. + """ + tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible + for subtree, vars in t1.search(tree): + res = t2.apply_vars(vars) + subtree.set(res.data, res.children) + return tree + + + +class TemplateTranslator: + """Utility class for translating a collection of patterns + """ + + def __init__(self, translations: Mapping[TreeOrCode, TreeOrCode]): + assert all( isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items() ) + self.translations = translations + + def translate(self, tree: Tree): + for k, v in self.translations.items(): + tree = translate(k, v, tree) + return tree diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index a46b374..3268316 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 +Subproject commit 326831689826cb1b9a4d21d1ce0d5db9278e9636