@@ -105,41 +105,37 @@ class State(object): | |||||
class Parser(object): | class Parser(object): | ||||
def __init__(self, rules, start=None): | def __init__(self, rules, start=None): | ||||
self.table = [[]] | |||||
self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] | self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] | ||||
self.rules_by_name = classify(self.rules, lambda r: r.name) | self.rules_by_name = classify(self.rules, lambda r: r.name) | ||||
self.start = start or self.rules[0].name | self.start = start or self.rules[0].name | ||||
initial_rules = set(self.rules_by_name[self.start]) | |||||
self.table[0] += [State(r, 0, 0) for r in initial_rules] | |||||
self.advance_to(0, initial_rules) | |||||
self.current = 0 | |||||
def advance_to(self, n, added_rules): | |||||
for w, s in enumerate(self.table[n]): | |||||
s.process(n, w, self.table, self.rules_by_name, added_rules) | |||||
def advance_to(self, table, n, added_rules): | |||||
for w, s in enumerate(table[n]): | |||||
s.process(n, w, table, self.rules_by_name, added_rules) | |||||
def parse(self, stream): | |||||
initial_rules = set(self.rules_by_name[self.start]) | |||||
table = [[State(r, 0, 0) for r in initial_rules]] | |||||
self.advance_to(table, 0, initial_rules) | |||||
def parse(self, chunk): | |||||
chunk_pos = 0 | |||||
for chunk_pos, chunk_item in enumerate(chunk): | |||||
self.table.append([]) | |||||
for pos, token in enumerate(stream): | |||||
table.append([]) | |||||
for s in self.table[self.current + chunk_pos]: | |||||
x = s.consume_terminal(chunk_item) | |||||
for s in table[pos]: | |||||
x = s.consume_terminal(token) | |||||
if x: | if x: | ||||
self.table[self.current + chunk_pos + 1].append(x) | |||||
table[pos + 1].append(x) | |||||
added_rules = set() | |||||
self.advance_to(self.current + chunk_pos + 1, added_rules) | |||||
self.advance_to(table, pos + 1, set()) | |||||
if not self.table[-1]: | |||||
raise Exception('Error at line {t.line}:{t.column}'.format(t=chunk[chunk_pos])) | |||||
if not table[-1]: | |||||
raise Exception('Error at line {t.line}:{t.column}'.format(t=stream[pos])) | |||||
self.current += chunk_pos | |||||
return list(self.finish()) | |||||
return list(self.finish(table)) | |||||
def finish(self): | |||||
for t in self.table[-1]: | |||||
def finish(self, table): | |||||
for t in table[-1]: | |||||
if (t.rule.name == self.start | if (t.rule.name == self.start | ||||
and t.expect == len(t.rule.symbols) | and t.expect == len(t.rule.symbols) | ||||
and t.reference == 0 | and t.reference == 0 | ||||
@@ -1,59 +0,0 @@ | |||||
from lark.tree import Transformer | |||||
from lark.lark import Lark | |||||
calc_grammar = """ | |||||
?start: sum | |||||
| NAME "=" sum -> *assign_var | |||||
?sum: product | |||||
| sum "+" product -> *add | |||||
| sum "-" product -> *sub | |||||
?product: atom | |||||
| product "*" atom -> *mul | |||||
| product "/" atom -> *div | |||||
?atom: /[\d.]+/ -> *number | |||||
| "-" atom -> *neg | |||||
| NAME -> *var | |||||
| "(" sum ")" | |||||
NAME: /\w+/ | |||||
WS.ignore: /\s+/ | |||||
""" | |||||
class CalculateTree(Transformer): | |||||
from operator import add, sub, mul, div, neg | |||||
number = float | |||||
def __init__(self): | |||||
self.vars = {} | |||||
def assign_var(self, name, value): | |||||
self.vars[name] = value | |||||
return value | |||||
def var(self, name): | |||||
return self.vars[name] | |||||
calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree()) | |||||
calc = calc_parser.parse | |||||
def main(): | |||||
while True: | |||||
try: | |||||
s = raw_input('> ') | |||||
except EOFError: | |||||
break | |||||
print(calc(s)) | |||||
def test(): | |||||
print calc("a = 1+2") | |||||
print calc("1+a*-3") | |||||
if __name__ == '__main__': | |||||
test() | |||||
# main() | |||||
@@ -1,62 +0,0 @@ | |||||
import sys | |||||
from lark.lark import Lark | |||||
from lark.tree import Transformer | |||||
json_grammar = r""" | |||||
?start: value | |||||
?value: object | |||||
| array | |||||
| string | |||||
| number | |||||
| "true" -> *true | |||||
| "false" -> *false | |||||
| "null" -> *null | |||||
array : "[" [value ("," value)*] "]" | |||||
object : "{" [pair ("," pair)*] "}" | |||||
pair : string ":" value | |||||
*number : /-?\d+(\.\d+)?([eE][+-]?\d+)?/ | |||||
*string : /".*?(?<!\\)"/ | |||||
WS.ignore.newline: /[ \t\n]+/ | |||||
""" | |||||
class TreeToJson(Transformer): | |||||
def string(self, s): | |||||
return s[1:-1] | |||||
array = list | |||||
pair = tuple | |||||
object = dict | |||||
number = float | |||||
null = lambda self: None | |||||
true = lambda self: True | |||||
false = lambda self: False | |||||
json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
def test(): | |||||
test_json = ''' | |||||
{ | |||||
"empty_object" : {}, | |||||
"empty_array" : [], | |||||
"booleans" : { "YES" : true, "NO" : false }, | |||||
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], | |||||
"strings" : [ "This", [ "And" , "That" ] ], | |||||
"nothing" : null | |||||
} | |||||
''' | |||||
j = json_parser.parse(test_json) | |||||
print j | |||||
import json | |||||
assert j == json.loads(test_json) | |||||
if __name__ == '__main__': | |||||
test() | |||||
with open(sys.argv[1]) as f: | |||||
print json_parser.parse(f.read()) | |||||
@@ -1,6 +1,9 @@ | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
from .utils import STRING_TYPE | |||||
import functools | |||||
import types | |||||
from .utils import STRING_TYPE, inline_args | |||||
from .load_grammar import load_grammar | from .load_grammar import load_grammar | ||||
from .tree import Tree, Transformer | from .tree import Tree, Transformer | ||||
@@ -36,6 +39,10 @@ class LarkOptions(object): | |||||
self.parser = o.pop('parser', 'earley') | self.parser = o.pop('parser', 'earley') | ||||
self.transformer = o.pop('transformer', None) | self.transformer = o.pop('transformer', None) | ||||
assert self.parser in ENGINE_DICT | |||||
if self.parser == 'earley' and self.transformer: | |||||
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||||
if o: | if o: | ||||
raise ValueError("Unknown options: %s" % o.keys()) | raise ValueError("Unknown options: %s" % o.keys()) | ||||
@@ -45,11 +52,11 @@ class Callback(object): | |||||
class RuleTreeToText(Transformer): | class RuleTreeToText(Transformer): | ||||
def expansions(self, *x): | |||||
def expansions(self, x): | |||||
return x | return x | ||||
def expansion(self, *symbols): | |||||
def expansion(self, symbols): | |||||
return [sym.value for sym in symbols], None | return [sym.value for sym in symbols], None | ||||
def alias(self, (expansion, _alias), alias): | |||||
def alias(self, ((expansion, _alias), alias)): | |||||
assert _alias is None, (alias, expansion, '-', _alias) | assert _alias is None, (alias, expansion, '-', _alias) | ||||
return expansion, alias.value | return expansion, alias.value | ||||
@@ -78,12 +85,6 @@ def create_expand1_tree_builder_function(tree_builder): | |||||
return tree_builder(children) | return tree_builder(children) | ||||
return f | return f | ||||
def create_rule_inline(f): | |||||
def _f(children): | |||||
return f(*children) | |||||
return _f | |||||
class LALR: | class LALR: | ||||
def build_parser(self, rules, callback): | def build_parser(self, rules, callback): | ||||
ga = GrammarAnalyzer(rules) | ga = GrammarAnalyzer(rules) | ||||
@@ -109,6 +110,7 @@ class EarleyParser: | |||||
return res[0] | return res[0] | ||||
ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||||
class Lark: | class Lark: | ||||
def __init__(self, grammar, **options): | def __init__(self, grammar, **options): | ||||
@@ -144,10 +146,7 @@ class Lark: | |||||
self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
if not self.options.only_lex: | if not self.options.only_lex: | ||||
self.parser_engine = { | |||||
'lalr': LALR, | |||||
'earley': Earley, | |||||
}[self.options.parser]() | |||||
self.parser_engine = ENGINE_DICT[self.options.parser]() | |||||
self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
def _build_lexer(self): | def _build_lexer(self): | ||||
@@ -171,27 +170,25 @@ class Lark: | |||||
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | ||||
expand1 = origin.startswith('?') | expand1 = origin.startswith('?') | ||||
inline_args = origin.startswith('*') or (alias and alias.startswith('*')) | |||||
_origin = origin.lstrip('?*') | _origin = origin.lstrip('?*') | ||||
if alias: | if alias: | ||||
alias = alias.lstrip('*') | alias = alias.lstrip('*') | ||||
_alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | ||||
assert not hasattr(callback, _alias) | |||||
f = getattr(transformer, alias or _origin, None) | |||||
if f is None: | |||||
try: | |||||
f = transformer._get_func(alias or _origin) | |||||
# f = getattr(transformer, alias or _origin) | |||||
except AttributeError: | |||||
if alias: | if alias: | ||||
f = self._create_tree_builder_function(alias) | f = self._create_tree_builder_function(alias) | ||||
else: | else: | ||||
f = self._create_tree_builder_function(_origin) | f = self._create_tree_builder_function(_origin) | ||||
if expand1: | if expand1: | ||||
f = create_expand1_tree_builder_function(f) | f = create_expand1_tree_builder_function(f) | ||||
else: | |||||
if inline_args: | |||||
f = create_rule_inline(f) | |||||
alias_handler = create_rule_handler(expansion, f) | alias_handler = create_rule_handler(expansion, f) | ||||
assert not hasattr(callback, _alias) | |||||
setattr(callback, _alias, alias_handler) | setattr(callback, _alias, alias_handler) | ||||
rules.append((_origin, expansion, _alias)) | rules.append((_origin, expansion, _alias)) | ||||
@@ -3,7 +3,7 @@ from lexer import Lexer, Token | |||||
from grammar_analysis import GrammarAnalyzer | from grammar_analysis import GrammarAnalyzer | ||||
from parser import Parser | from parser import Parser | ||||
from tree import Tree as T, Transformer, Visitor | |||||
from tree import Tree as T, Transformer, InlineTransformer, Visitor | |||||
_TOKEN_NAMES = { | _TOKEN_NAMES = { | ||||
':' : 'COLON', | ':' : 'COLON', | ||||
@@ -186,7 +186,7 @@ class SaveDefinitions(object): | |||||
def item(self, *x): pass | def item(self, *x): pass | ||||
class EBNF_to_BNF(Transformer): | |||||
class EBNF_to_BNF(InlineTransformer): | |||||
def __init__(self): | def __init__(self): | ||||
self.new_rules = {} | self.new_rules = {} | ||||
self.prefix = 'anon' | self.prefix = 'anon' | ||||
@@ -286,7 +286,6 @@ def inline_args(f): | |||||
return f(*args) | return f(*args) | ||||
return _f | return _f | ||||
class GrammarLoader: | class GrammarLoader: | ||||
def __init__(self): | def __init__(self): | ||||
self.rules = list(generate_aliases()) | self.rules = list(generate_aliases()) | ||||
@@ -1,3 +1,4 @@ | |||||
from utils import inline_args | |||||
class Tree(object): | class Tree(object): | ||||
def __init__(self, data, children): | def __init__(self, data, children): | ||||
@@ -53,20 +54,29 @@ class Tree(object): | |||||
class Transformer(object): | class Transformer(object): | ||||
def _get_func(self, name): | |||||
return getattr(self, name) | |||||
def transform(self, tree): | def transform(self, tree): | ||||
items = [self.transform(c) if isinstance(c, Tree) else c for c in tree.children] | items = [self.transform(c) if isinstance(c, Tree) else c for c in tree.children] | ||||
try: | try: | ||||
f = getattr(self, tree.data) | |||||
f = self._get_func(tree.data) | |||||
except AttributeError: | except AttributeError: | ||||
return self.__default__(tree.data, items) | return self.__default__(tree.data, items) | ||||
else: | else: | ||||
return f(*items) | |||||
return f(items) | |||||
def __default__(self, data, children): | def __default__(self, data, children): | ||||
return Tree(data, children) | return Tree(data, children) | ||||
class InlineTransformer(Transformer): | |||||
def _get_func(self, name): | |||||
return inline_args(getattr(self, name)).__get__(self) | |||||
class Visitor(object): | class Visitor(object): | ||||
def visit(self, tree): | def visit(self, tree): | ||||
for child in tree.children: | for child in tree.children: | ||||
@@ -49,3 +49,23 @@ except NameError: # Python 3 | |||||
Str = type(u'') | Str = type(u'') | ||||
import functools | |||||
import types | |||||
def inline_args(f): | |||||
# print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) | |||||
if isinstance(f, types.FunctionType): | |||||
@functools.wraps(f) | |||||
def _f_func(self, args): | |||||
return f(self, *args) | |||||
return _f_func | |||||
elif isinstance(f, (types.TypeType, types.BuiltinFunctionType)): | |||||
@functools.wraps(f) | |||||
def _f_builtin(self, args): | |||||
return f(*args) | |||||
return _f_builtin | |||||
else: | |||||
@functools.wraps(f) | |||||
def _f(self, args): | |||||
return f.__func__(self, *args) | |||||
return _f | |||||