@@ -105,41 +105,37 @@ class State(object): | |||
class Parser(object): | |||
def __init__(self, rules, start=None): | |||
self.table = [[]] | |||
self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] | |||
self.rules_by_name = classify(self.rules, lambda r: r.name) | |||
self.start = start or self.rules[0].name | |||
initial_rules = set(self.rules_by_name[self.start]) | |||
self.table[0] += [State(r, 0, 0) for r in initial_rules] | |||
self.advance_to(0, initial_rules) | |||
self.current = 0 | |||
def advance_to(self, n, added_rules): | |||
for w, s in enumerate(self.table[n]): | |||
s.process(n, w, self.table, self.rules_by_name, added_rules) | |||
def advance_to(self, table, n, added_rules): | |||
for w, s in enumerate(table[n]): | |||
s.process(n, w, table, self.rules_by_name, added_rules) | |||
def parse(self, stream): | |||
initial_rules = set(self.rules_by_name[self.start]) | |||
table = [[State(r, 0, 0) for r in initial_rules]] | |||
self.advance_to(table, 0, initial_rules) | |||
def parse(self, chunk): | |||
chunk_pos = 0 | |||
for chunk_pos, chunk_item in enumerate(chunk): | |||
self.table.append([]) | |||
for pos, token in enumerate(stream): | |||
table.append([]) | |||
for s in self.table[self.current + chunk_pos]: | |||
x = s.consume_terminal(chunk_item) | |||
for s in table[pos]: | |||
x = s.consume_terminal(token) | |||
if x: | |||
self.table[self.current + chunk_pos + 1].append(x) | |||
table[pos + 1].append(x) | |||
added_rules = set() | |||
self.advance_to(self.current + chunk_pos + 1, added_rules) | |||
self.advance_to(table, pos + 1, set()) | |||
if not self.table[-1]: | |||
raise Exception('Error at line {t.line}:{t.column}'.format(t=chunk[chunk_pos])) | |||
if not table[-1]: | |||
raise Exception('Error at line {t.line}:{t.column}'.format(t=stream[pos])) | |||
self.current += chunk_pos | |||
return list(self.finish()) | |||
return list(self.finish(table)) | |||
def finish(self): | |||
for t in self.table[-1]: | |||
def finish(self, table): | |||
for t in table[-1]: | |||
if (t.rule.name == self.start | |||
and t.expect == len(t.rule.symbols) | |||
and t.reference == 0 | |||
@@ -1,59 +0,0 @@ | |||
from lark.tree import Transformer | |||
from lark.lark import Lark | |||
calc_grammar = """ | |||
?start: sum | |||
| NAME "=" sum -> *assign_var | |||
?sum: product | |||
| sum "+" product -> *add | |||
| sum "-" product -> *sub | |||
?product: atom | |||
| product "*" atom -> *mul | |||
| product "/" atom -> *div | |||
?atom: /[\d.]+/ -> *number | |||
| "-" atom -> *neg | |||
| NAME -> *var | |||
| "(" sum ")" | |||
NAME: /\w+/ | |||
WS.ignore: /\s+/ | |||
""" | |||
class CalculateTree(Transformer): | |||
from operator import add, sub, mul, div, neg | |||
number = float | |||
def __init__(self): | |||
self.vars = {} | |||
def assign_var(self, name, value): | |||
self.vars[name] = value | |||
return value | |||
def var(self, name): | |||
return self.vars[name] | |||
calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree()) | |||
calc = calc_parser.parse | |||
def main(): | |||
while True: | |||
try: | |||
s = raw_input('> ') | |||
except EOFError: | |||
break | |||
print(calc(s)) | |||
def test(): | |||
print calc("a = 1+2") | |||
print calc("1+a*-3") | |||
if __name__ == '__main__': | |||
test() | |||
# main() | |||
@@ -1,62 +0,0 @@ | |||
import sys | |||
from lark.lark import Lark | |||
from lark.tree import Transformer | |||
json_grammar = r""" | |||
?start: value | |||
?value: object | |||
| array | |||
| string | |||
| number | |||
| "true" -> *true | |||
| "false" -> *false | |||
| "null" -> *null | |||
array : "[" [value ("," value)*] "]" | |||
object : "{" [pair ("," pair)*] "}" | |||
pair : string ":" value | |||
*number : /-?\d+(\.\d+)?([eE][+-]?\d+)?/ | |||
*string : /".*?(?<!\\)"/ | |||
WS.ignore.newline: /[ \t\n]+/ | |||
""" | |||
class TreeToJson(Transformer): | |||
def string(self, s): | |||
return s[1:-1] | |||
array = list | |||
pair = tuple | |||
object = dict | |||
number = float | |||
null = lambda self: None | |||
true = lambda self: True | |||
false = lambda self: False | |||
json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
def test(): | |||
test_json = ''' | |||
{ | |||
"empty_object" : {}, | |||
"empty_array" : [], | |||
"booleans" : { "YES" : true, "NO" : false }, | |||
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], | |||
"strings" : [ "This", [ "And" , "That" ] ], | |||
"nothing" : null | |||
} | |||
''' | |||
j = json_parser.parse(test_json) | |||
print j | |||
import json | |||
assert j == json.loads(test_json) | |||
if __name__ == '__main__': | |||
test() | |||
with open(sys.argv[1]) as f: | |||
print json_parser.parse(f.read()) | |||
@@ -1,6 +1,9 @@ | |||
from __future__ import absolute_import | |||
from .utils import STRING_TYPE | |||
import functools | |||
import types | |||
from .utils import STRING_TYPE, inline_args | |||
from .load_grammar import load_grammar | |||
from .tree import Tree, Transformer | |||
@@ -36,6 +39,10 @@ class LarkOptions(object): | |||
self.parser = o.pop('parser', 'earley') | |||
self.transformer = o.pop('transformer', None) | |||
assert self.parser in ENGINE_DICT | |||
if self.parser == 'earley' and self.transformer: | |||
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
if o: | |||
raise ValueError("Unknown options: %s" % o.keys()) | |||
@@ -45,11 +52,11 @@ class Callback(object): | |||
class RuleTreeToText(Transformer): | |||
def expansions(self, *x): | |||
def expansions(self, x): | |||
return x | |||
def expansion(self, *symbols): | |||
def expansion(self, symbols): | |||
return [sym.value for sym in symbols], None | |||
def alias(self, (expansion, _alias), alias): | |||
def alias(self, ((expansion, _alias), alias)): | |||
assert _alias is None, (alias, expansion, '-', _alias) | |||
return expansion, alias.value | |||
@@ -78,12 +85,6 @@ def create_expand1_tree_builder_function(tree_builder): | |||
return tree_builder(children) | |||
return f | |||
def create_rule_inline(f): | |||
def _f(children): | |||
return f(*children) | |||
return _f | |||
class LALR: | |||
def build_parser(self, rules, callback): | |||
ga = GrammarAnalyzer(rules) | |||
@@ -109,6 +110,7 @@ class EarleyParser: | |||
return res[0] | |||
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||
class Lark: | |||
def __init__(self, grammar, **options): | |||
@@ -144,10 +146,7 @@ class Lark: | |||
self.lexer = self._build_lexer() | |||
if not self.options.only_lex: | |||
self.parser_engine = { | |||
'lalr': LALR, | |||
'earley': Earley, | |||
}[self.options.parser]() | |||
self.parser_engine = ENGINE_DICT[self.options.parser]() | |||
self.parser = self._build_parser() | |||
def _build_lexer(self): | |||
@@ -171,27 +170,25 @@ class Lark: | |||
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | |||
expand1 = origin.startswith('?') | |||
inline_args = origin.startswith('*') or (alias and alias.startswith('*')) | |||
_origin = origin.lstrip('?*') | |||
if alias: | |||
alias = alias.lstrip('*') | |||
_alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
assert not hasattr(callback, _alias) | |||
f = getattr(transformer, alias or _origin, None) | |||
if f is None: | |||
try: | |||
f = transformer._get_func(alias or _origin) | |||
# f = getattr(transformer, alias or _origin) | |||
except AttributeError: | |||
if alias: | |||
f = self._create_tree_builder_function(alias) | |||
else: | |||
f = self._create_tree_builder_function(_origin) | |||
if expand1: | |||
f = create_expand1_tree_builder_function(f) | |||
else: | |||
if inline_args: | |||
f = create_rule_inline(f) | |||
alias_handler = create_rule_handler(expansion, f) | |||
assert not hasattr(callback, _alias) | |||
setattr(callback, _alias, alias_handler) | |||
rules.append((_origin, expansion, _alias)) | |||
@@ -3,7 +3,7 @@ from lexer import Lexer, Token | |||
from grammar_analysis import GrammarAnalyzer | |||
from parser import Parser | |||
from tree import Tree as T, Transformer, Visitor | |||
from tree import Tree as T, Transformer, InlineTransformer, Visitor | |||
_TOKEN_NAMES = { | |||
':' : 'COLON', | |||
@@ -186,7 +186,7 @@ class SaveDefinitions(object): | |||
def item(self, *x): pass | |||
class EBNF_to_BNF(Transformer): | |||
class EBNF_to_BNF(InlineTransformer): | |||
def __init__(self): | |||
self.new_rules = {} | |||
self.prefix = 'anon' | |||
@@ -286,7 +286,6 @@ def inline_args(f): | |||
return f(*args) | |||
return _f | |||
class GrammarLoader: | |||
def __init__(self): | |||
self.rules = list(generate_aliases()) | |||
@@ -1,3 +1,4 @@ | |||
from utils import inline_args | |||
class Tree(object): | |||
def __init__(self, data, children): | |||
@@ -53,20 +54,29 @@ class Tree(object): | |||
class Transformer(object): | |||
def _get_func(self, name): | |||
return getattr(self, name) | |||
def transform(self, tree): | |||
items = [self.transform(c) if isinstance(c, Tree) else c for c in tree.children] | |||
try: | |||
f = getattr(self, tree.data) | |||
f = self._get_func(tree.data) | |||
except AttributeError: | |||
return self.__default__(tree.data, items) | |||
else: | |||
return f(*items) | |||
return f(items) | |||
def __default__(self, data, children): | |||
return Tree(data, children) | |||
class InlineTransformer(Transformer): | |||
def _get_func(self, name): | |||
return inline_args(getattr(self, name)).__get__(self) | |||
class Visitor(object): | |||
def visit(self, tree): | |||
for child in tree.children: | |||
@@ -49,3 +49,23 @@ except NameError: # Python 3 | |||
Str = type(u'') | |||
import functools | |||
import types | |||
def inline_args(f): | |||
# print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) | |||
if isinstance(f, types.FunctionType): | |||
@functools.wraps(f) | |||
def _f_func(self, args): | |||
return f(self, *args) | |||
return _f_func | |||
elif isinstance(f, (types.TypeType, types.BuiltinFunctionType)): | |||
@functools.wraps(f) | |||
def _f_builtin(self, args): | |||
return f(*args) | |||
return _f_builtin | |||
else: | |||
@functools.wraps(f) | |||
def _f(self, args): | |||
return f.__func__(self, *args) | |||
return _f | |||