Browse Source

Merge branch 'new_transformers' into 0.6.0

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
818a7173d0
14 changed files with 373 additions and 203 deletions
  1. +3
    -2
      examples/calc.py
  2. +3
    -3
      examples/json_parser.py
  3. +3
    -2
      lark/__init__.py
  4. +3
    -0
      lark/lexer.py
  5. +22
    -18
      lark/load_grammar.py
  6. +31
    -8
      lark/parse_tree_builder.py
  7. +10
    -9
      lark/parsers/earley.py
  8. +14
    -7
      lark/parsers/resolve_ambig.py
  9. +1
    -1
      lark/tools/nearley.py
  10. +10
    -126
      lark/tree.py
  11. +14
    -24
      lark/utils.py
  12. +216
    -0
      lark/visitors.py
  13. +3
    -2
      tests/test_parser.py
  14. +40
    -1
      tests/test_trees.py

+ 3
- 2
examples/calc.py View File

@@ -2,7 +2,7 @@
# This example shows how to write a basic calculator with variables.
#

from lark import Lark, InlineTransformer
from lark import Lark, Transformer, v_args

try:
input = raw_input # For Python2 compatibility
@@ -34,7 +34,8 @@ calc_grammar = """
%ignore WS_INLINE
"""

class CalculateTree(InlineTransformer):
@v_args(inline=True)
class CalculateTree(Transformer):
from operator import add, sub, mul, truediv as div, neg
number = float



+ 3
- 3
examples/json_parser.py View File

@@ -7,7 +7,7 @@

import sys

from lark import Lark, inline_args, Transformer
from lark import Lark, Transformer, v_args

json_grammar = r"""
?start: value
@@ -34,14 +34,14 @@ json_grammar = r"""
"""

class TreeToJson(Transformer):
@inline_args
@v_args(inline=True)
def string(self, s):
return s[1:-1].replace('\\"', '"')

array = list
pair = tuple
object = dict
number = inline_args(float)
number = v_args(inline=True)(float)

null = lambda self, _: None
true = lambda self, _: True


+ 3
- 2
lark/__init__.py View File

@@ -1,7 +1,8 @@
from .tree import Tree, Transformer, InlineTransformer
from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard
from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .common import ParseError, GrammarError, UnexpectedToken
from .lexer import UnexpectedInput, LexError
from .lark import Lark
from .utils import inline_args

__version__ = "0.5.6"

+ 3
- 0
lark/lexer.py View File

@@ -34,6 +34,8 @@ class Token(Str):
self.value = value
self.line = line
self.column = column
self.end_line = None
self.end_column = None
return self

@classmethod
@@ -112,6 +114,7 @@ class _Lex:
if t:
t.end_line = line_ctr.line
t.end_column = line_ctr.column

break
else:
if line_ctr.char_pos < len(stream):


+ 22
- 18
lark/load_grammar.py View File

@@ -15,7 +15,9 @@ from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress

from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST
from .tree import Tree, SlottedTree as ST
from .visitors import Transformer, Visitor, v_args
inline_args = v_args(inline=True)

__path__ = os.path.dirname(__file__)
IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
@@ -140,7 +142,8 @@ RULES = {
}


class EBNF_to_BNF(InlineTransformer):
@inline_args
class EBNF_to_BNF(Transformer):
def __init__(self):
self.new_rules = []
self.rules_by_expr = {}
@@ -209,17 +212,14 @@ class SimplifyRule_Visitor(Visitor):
# -->
# expansions( expansion(b, c, e), expansion(b, d, e) )

while True:
self._flatten(tree)

for i, child in enumerate(tree.children):
if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions'
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in set(child.children)]
break
else:
self._flatten(tree)

for i, child in enumerate(tree.children):
if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions'
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in set(child.children)]
break

def alias(self, tree):
@@ -243,11 +243,12 @@ class RuleTreeToText(Transformer):
return symbols, None
def alias(self, x):
(expansion, _alias), alias = x
assert _alias is None, (alias, expansion, '-', _alias)
assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
return expansion, alias.value


class CanonizeTree(InlineTransformer):
@inline_args
class CanonizeTree(Transformer):
def maybe(self, expr):
return ST('expr', [expr, Token('OP', '?', -1)])

@@ -257,7 +258,7 @@ class CanonizeTree(InlineTransformer):
tokenmods, value = args
return tokenmods + [value]

class PrepareAnonTerminals(InlineTransformer):
class PrepareAnonTerminals(Transformer):
"Create a unique list of anonymous tokens. Attempt to give meaningful names to them when we add them"

def __init__(self, tokens):
@@ -267,6 +268,7 @@ class PrepareAnonTerminals(InlineTransformer):
self.i = 0


@inline_args
def pattern(self, p):
value = p.value
if p in self.token_reverse and p.flags != self.token_reverse[p].pattern.flags:
@@ -356,7 +358,8 @@ def _literal_to_pattern(literal):
'REGEXP': PatternRE }[literal.type](s, flags)


class PrepareLiterals(InlineTransformer):
@inline_args
class PrepareLiterals(Transformer):
def literal(self, literal):
return ST('pattern', [_literal_to_pattern(literal)])

@@ -543,7 +546,8 @@ def options_from_rule(name, *x):
def symbols_from_strcase(expansion):
return [Terminal(x, filter_out=x.startswith('_')) if is_terminal(x) else NonTerminal(x) for x in expansion]

class PrepareGrammar(InlineTransformer):
@inline_args
class PrepareGrammar(Transformer):
def terminal(self, name):
return name
def nonterminal(self, name):


+ 31
- 8
lark/parse_tree_builder.py View File

@@ -2,9 +2,11 @@ from .common import GrammarError
from .utils import suppress
from .lexer import Token
from .grammar import Rule
from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated

###{standalone
from functools import partial
from functools import partial, wraps


class ExpandSingleChild:
@@ -27,15 +29,23 @@ class PropagatePositions:

if children:
for a in children:
with suppress(AttributeError):
res.line = a.line
res.column = a.column
if isinstance(a, Tree):
res.meta.line = a.meta.line
res.meta.column = a.meta.column
elif isinstance(a, Token):
res.meta.line = a.line
res.meta.column = a.column
break

for a in reversed(children):
with suppress(AttributeError):
res.end_line = a.end_line
res.end_column = a.end_column
# with suppress(AttributeError):
if isinstance(a, Tree):
res.meta.end_line = a.meta.end_line
res.meta.end_column = a.meta.end_column
elif isinstance(a, Token):
res.meta.end_line = a.end_line
res.meta.end_column = a.end_column

break

return res
@@ -86,6 +96,15 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous):
class Callback(object):
pass


def inline_args(func):
@wraps(func)
def f(children):
return func(*children)
return f



class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
self.tree_class = tree_class
@@ -120,7 +139,11 @@ class ParseTreeBuilder:

user_callback_name = rule.alias or rule.origin.name
try:
f = transformer._get_func(user_callback_name)
f = getattr(transformer, user_callback_name)
assert not getattr(f, 'meta', False), "Meta args not supported for internal transformer"
# XXX InlineTransformer is deprecated!
if getattr(f, 'inline', False) or isinstance(transformer, InlineTransformer):
f = inline_args(f)
except AttributeError:
f = partial(self.tree_class, user_callback_name)



+ 10
- 9
lark/parsers/earley.py View File

@@ -13,18 +13,18 @@
# Author: Erez Shinan (2017)
# Email : erezshin@gmail.com

from ..tree import Tree
from ..visitors import Transformer_InPlace, v_args
from ..common import ParseError, UnexpectedToken
from ..tree import Tree, Transformer_NoRecurse
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal


class Derivation(Tree):
_hash = None

def __init__(self, rule, items=None):
Tree.__init__(self, 'drv', items or [])
self.rule = rule
self.meta.rule = rule
self._hash = None

def _pretty_label(self): # Nicer pretty for debugging the parser
return self.rule.origin if self.rule else self.data
@@ -114,9 +114,9 @@ class Column:

if old_tree.data != '_ambig':
new_tree = old_tree.copy()
new_tree.rule = old_tree.rule
new_tree.meta.rule = old_tree.meta.rule
old_tree.set('_ambig', [new_tree])
old_tree.rule = None # No longer a 'drv' node
old_tree.meta.rule = None # No longer a 'drv' node

if item.tree.children[0] is old_tree: # XXX a little hacky!
raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
@@ -230,9 +230,10 @@ class Parser:
return ApplyCallbacks(self.postprocess).transform(tree)


class ApplyCallbacks(Transformer_NoRecurse):
class ApplyCallbacks(Transformer_InPlace):
def __init__(self, postprocess):
self.postprocess = postprocess

def drv(self, tree):
return self.postprocess[tree.rule](tree.children)
@v_args(meta=True)
def drv(self, children, meta):
return self.postprocess[meta.rule](children)

+ 14
- 7
lark/parsers/resolve_ambig.py View File

@@ -1,7 +1,7 @@
from ..utils import compare
from functools import cmp_to_key

from ..tree import Tree, Visitor_NoRecurse
from ..tree import Tree


# Standard ambiguity resolver (uses comparison)
@@ -16,7 +16,7 @@ def _sum_priority(tree):

for n in tree.iter_subtrees():
try:
p += n.rule.options.priority or 0
p += n.meta.rule.options.priority or 0
except AttributeError:
pass

@@ -26,8 +26,15 @@ def _compare_priority(tree1, tree2):
tree1.iter_subtrees()

def _compare_drv(tree1, tree2):
rule1 = getattr(tree1, 'rule', None)
rule2 = getattr(tree2, 'rule', None)
try:
rule1 = tree1.meta.rule
except AttributeError:
rule1 = None

try:
rule2 = tree2.meta.rule
except AttributeError:
rule2 = None

if None == rule1 == rule2:
return compare(tree1, tree2)
@@ -45,7 +52,7 @@ def _compare_drv(tree1, tree2):
if c:
return c

c = _compare_rules(tree1.rule, tree2.rule)
c = _compare_rules(tree1.meta.rule, tree2.meta.rule)
if c:
return c

@@ -65,7 +72,7 @@ def _standard_resolve_ambig(tree):
best = max(tree.children, key=key_f)
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks
tree.meta.rule = best.meta.rule # needed for applying callbacks

def standard_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):
@@ -93,7 +100,7 @@ def _antiscore_sum_resolve_ambig(tree):
best = min(tree.children, key=_antiscore_sum_drv)
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks
tree.meta.rule = best.meta.rule # needed for applying callbacks

def antiscore_sum_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):


+ 1
- 1
lark/tools/nearley.py View File

@@ -160,7 +160,7 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):
emit('class TransformNearley(Transformer):')
for alias in n2l.alias_js_code:
emit(" %s = var.get('%s').to_python()" % (alias, alias))
emit(" __default__ = lambda self, n, c: c if c else None")
emit(" __default__ = lambda self, n, c, m: c if c else None")

emit()
emit('parser = Lark(grammar, start="n_%s")' % start)


+ 10
- 126
lark/tree.py View File

@@ -5,13 +5,21 @@ except ImportError:

from copy import deepcopy

from .utils import inline_args
class Meta:
pass

###{standalone
class Tree(object):
def __init__(self, data, children):
def __init__(self, data, children, meta=None):
self.data = data
self.children = children
self._meta = meta

@property
def meta(self):
if self._meta is None:
self._meta = Meta()
return self._meta

def __repr__(self):
return 'Tree(%s, %s)' % (self.data, self.children)
@@ -103,130 +111,6 @@ class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule'


###{standalone
class Transformer(object):
def _get_func(self, name):
return getattr(self, name)

def transform(self, tree):
items = []
for c in tree.children:
try:
items.append(self.transform(c) if isinstance(c, Tree) else c)
except Discard:
pass
try:
f = self._get_func(tree.data)
except AttributeError:
return self.__default__(tree.data, items)
else:
return f(items)

def __default__(self, data, children):
return Tree(data, children)

def __mul__(self, other):
return TransformerChain(self, other)


class Discard(Exception):
pass

class TransformerChain(object):
def __init__(self, *transformers):
self.transformers = transformers

def transform(self, tree):
for t in self.transformers:
tree = t.transform(tree)
return tree

def __mul__(self, other):
return TransformerChain(*self.transformers + (other,))



class InlineTransformer(Transformer):
def _get_func(self, name): # use super()._get_func
return inline_args(getattr(self, name)).__get__(self)


class Visitor(object):
def visit(self, tree):
for child in tree.children:
if isinstance(child, Tree):
self.visit(child)

f = getattr(self, tree.data, self.__default__)
f(tree)
return tree

def __default__(self, tree):
pass


class Visitor_NoRecurse(Visitor):
def visit(self, tree):
subtrees = list(tree.iter_subtrees())

for subtree in (subtrees):
getattr(self, subtree.data, self.__default__)(subtree)
return tree


from functools import wraps
def visit_children_decor(func):
@wraps(func)
def inner(cls, tree):
values = cls.visit_children(tree)
return func(cls, values)
return inner

class Interpreter(object):

def visit(self, tree):
return getattr(self, tree.data)(tree)

def visit_children(self, tree):
return [self.visit(child) if isinstance(child, Tree) else child
for child in tree.children]

def __getattr__(self, name):
return self.__default__

def __default__(self, tree):
return self.visit_children(tree)


class Transformer_NoRecurse(Transformer):
def transform(self, tree):
subtrees = list(tree.iter_subtrees())

def _t(t):
# Assumes t is already transformed
try:
f = self._get_func(t.data)
except AttributeError:
return self.__default__(t)
else:
return f(t)

for subtree in subtrees:
children = []
for c in subtree.children:
try:
children.append(_t(c) if isinstance(c, Tree) else c)
except Discard:
pass
subtree.children = children

return _t(tree)

def __default__(self, t):
return t
###}


def pydot__tree_to_png(tree, filename):
import pydot
graph = pydot.Dot(graph_type='digraph', rankdir="LR")


+ 14
- 24
lark/utils.py View File

@@ -50,39 +50,29 @@ except NameError: # Python 3
###{standalone

import types
import functools
from functools import wraps, partial
from contextlib import contextmanager

Str = type(u'')

def inline_args(f):
# print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType)
def smart_decorator(f, create_decorator):
if isinstance(f, types.FunctionType):
@functools.wraps(f)
def _f_func(self, args):
return f(self, *args)
return _f_func
return wraps(f)(create_decorator(f, True))

elif isinstance(f, (type, types.BuiltinFunctionType)):
@functools.wraps(f)
def _f_builtin(_self, args):
return f(*args)
return _f_builtin
return wraps(f)(create_decorator(f, False))

elif isinstance(f, types.MethodType):
@functools.wraps(f.__func__)
def _f(self, args):
return f.__func__(self, *args)
return _f
elif isinstance(f, functools.partial):
return wraps(f)(create_decorator(f.__func__, True))

elif isinstance(f, partial):
# wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
# @functools.wraps(f)
def _f(self, args):
return f(*args)
return _f
return create_decorator(f.__func__, True)

else:
@functools.wraps(f.__call__.__func__)
def _f(self, args):
return f.__call__.__func__(self, *args)
return _f
return create_decorator(f.__func__.__call__, True)




try:


+ 216
- 0
lark/visitors.py View File

@@ -0,0 +1,216 @@
from inspect import isclass, getmembers, getmro
from functools import wraps

from .utils import smart_decorator
from .tree import Tree

class Discard(Exception):
pass


# Transformers

class Transformer:
def _call_userfunc(self, data, children, meta):
# Assumes tree is already transformed
try:
f = getattr(self, data)
except AttributeError:
return self.__default__(data, children, meta)
else:
if getattr(f, 'meta', False):
return f(children, meta)
elif getattr(f, 'inline', False):
return f(*children)
else:
return f(children)

def _transform_children(self, children):
for c in children:
try:
yield self._transform_tree(c) if isinstance(c, Tree) else c
except Discard:
pass

def _transform_tree(self, tree):
children = list(self._transform_children(tree.children))
return self._call_userfunc(tree.data, children, tree.meta)

def transform(self, tree):
return self._transform_tree(tree)

def __mul__(self, other):
return TransformerChain(self, other)

def __default__(self, data, children, meta):
"Default operation on tree (for override)"
return Tree(data, children, meta)

@classmethod
def _apply_decorator(cls, decorator, **kwargs):
mro = getmro(cls)
assert mro[0] is cls
libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
for name, value in getmembers(cls):
if name.startswith('_') or name in libmembers:
continue

setattr(cls, name, decorator(value, **kwargs))
return cls


class InlineTransformer(Transformer): # XXX Deprecated
def _call_userfunc(self, data, children, meta):
# Assumes tree is already transformed
try:
f = getattr(self, data)
except AttributeError:
return self.__default__(data, children, meta)
else:
return f(*children)


class TransformerChain(object):
def __init__(self, *transformers):
self.transformers = transformers

def transform(self, tree):
for t in self.transformers:
tree = t.transform(tree)
return tree

def __mul__(self, other):
return TransformerChain(*self.transformers + (other,))


class Transformer_InPlace(Transformer):
def _transform_tree(self, tree): # Cancel recursion
return self._call_userfunc(tree.data, tree.children, tree.meta)

def transform(self, tree):
for subtree in tree.iter_subtrees():
subtree.children = list(self._transform_children(subtree.children))

return self._transform_tree(tree)


class Transformer_InPlaceRecursive(Transformer):
def _transform_tree(self, tree):
tree.children = list(self._transform_children(tree.children))
return self._call_userfunc(tree.data, tree.children, tree.meta)



# Visitors

class VisitorBase:
def _call_userfunc(self, tree):
return getattr(self, tree.data, self.__default__)(tree)

def __default__(self, tree):
"Default operation on tree (for override)"
return tree


class Visitor(VisitorBase):
"Bottom-up visitor"

def visit(self, tree):
for subtree in tree.iter_subtrees():
self._call_userfunc(subtree)
return tree

class Visitor_Recursive(VisitorBase):
def visit(self, tree):
for child in tree.children:
if isinstance(child, Tree):
self.visit(child)

f = getattr(self, tree.data, self.__default__)
f(tree)
return tree



def visit_children_decor(func):
@wraps(func)
def inner(cls, tree):
values = cls.visit_children(tree)
return func(cls, values)
return inner


class Interpreter:
"Top-down visitor"

def visit(self, tree):
return getattr(self, tree.data)(tree)

def visit_children(self, tree):
return [self.visit(child) if isinstance(child, Tree) else child
for child in tree.children]

def __getattr__(self, name):
return self.__default__

def __default__(self, tree):
return self.visit_children(tree)




# Decorators

def _apply_decorator(obj, decorator, **kwargs):
try:
_apply = obj._apply_decorator
except AttributeError:
return decorator(obj, **kwargs)
else:
return _apply(decorator, **kwargs)



def _inline_args__func(func):
@wraps(func)
def create_decorator(_f, with_self):
if with_self:
def f(self, children):
return _f(self, *children)
else:
def f(self, children):
return _f(*children)
return f

return smart_decorator(func, create_decorator)


def inline_args(obj): # XXX Deprecated
return _apply_decorator(obj, _inline_args__func)



def _visitor_args_func_dec(func, inline=False, meta=False):
assert not (inline and meta)
def create_decorator(_f, with_self):
if with_self:
def f(self, *args, **kwargs):
return _f(self, *args, **kwargs)
else:
def f(self, *args, **kwargs):
return _f(*args, **kwargs)
return f

f = smart_decorator(func, create_decorator)
f.inline = inline
f.meta = meta
return f

def v_args(inline=False, meta=False):
if inline and meta:
raise ValueError("Visitor functions can either accept meta, or be inlined. Not both.")
def _visitor_args_dec(obj):
return _apply_decorator(obj, _visitor_args_func_dec, inline=inline, meta=meta)
return _visitor_args_dec



+ 3
- 2
tests/test_parser.py View File

@@ -20,7 +20,8 @@ logging.basicConfig(level=logging.INFO)
from lark.lark import Lark
from lark.common import GrammarError, ParseError, UnexpectedToken
from lark.lexer import LexError, UnexpectedInput
from lark.tree import Tree, Transformer
from lark.tree import Tree
from lark.visitors import Transformer

__path__ = os.path.dirname(__file__)
def _read(n, *args):
@@ -57,7 +58,7 @@ class TestParsers(unittest.TestCase):
""", propagate_positions=True)

r = g.parse('a')
self.assertEqual( r.children[0].line, 1 )
self.assertEqual( r.children[0].meta.line, 1 )

def test_expand1(self):



+ 40
- 1
tests/test_trees.py View File

@@ -5,7 +5,8 @@ from unittest import TestCase
import copy
import pickle

from lark.tree import Tree, Interpreter, visit_children_decor
from lark.tree import Tree
from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args


class TestTrees(TestCase):
@@ -58,6 +59,44 @@ class TestTrees(TestCase):

self.assertEqual(Interp3().visit(t), list('BCd'))

def test_transformer(self):
t = Tree('add', [Tree('sub', [Tree('i', ['3']), Tree('f', ['1.1'])]), Tree('i', ['1'])])

class T(Transformer):
i = v_args(inline=True)(int)
f = v_args(inline=True)(float)

sub = lambda self, values: values[0] - values[1]

def add(self, values):
return sum(values)

res = T().transform(t)
self.assertEqual(res, 2.9)

@v_args(inline=True)
class T(Transformer):
i = int
f = float
sub = lambda self, a, b: a-b

def add(self, a, b):
return a + b


res = T().transform(t)
self.assertEqual(res, 2.9)


@v_args(inline=True)
class T(Transformer):
i = int
f = float
from operator import sub, add

res = T().transform(t)
self.assertEqual(res, 2.9)



if __name__ == '__main__':


Loading…
Cancel
Save