| @@ -105,41 +105,37 @@ class State(object): | |||
| class Parser(object): | |||
| def __init__(self, rules, start=None): | |||
| self.table = [[]] | |||
| self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] | |||
| self.rules_by_name = classify(self.rules, lambda r: r.name) | |||
| self.start = start or self.rules[0].name | |||
| initial_rules = set(self.rules_by_name[self.start]) | |||
| self.table[0] += [State(r, 0, 0) for r in initial_rules] | |||
| self.advance_to(0, initial_rules) | |||
| self.current = 0 | |||
| def advance_to(self, n, added_rules): | |||
| for w, s in enumerate(self.table[n]): | |||
| s.process(n, w, self.table, self.rules_by_name, added_rules) | |||
| def advance_to(self, table, n, added_rules): | |||
| for w, s in enumerate(table[n]): | |||
| s.process(n, w, table, self.rules_by_name, added_rules) | |||
| def parse(self, stream): | |||
| initial_rules = set(self.rules_by_name[self.start]) | |||
| table = [[State(r, 0, 0) for r in initial_rules]] | |||
| self.advance_to(table, 0, initial_rules) | |||
| def parse(self, chunk): | |||
| chunk_pos = 0 | |||
| for chunk_pos, chunk_item in enumerate(chunk): | |||
| self.table.append([]) | |||
| for pos, token in enumerate(stream): | |||
| table.append([]) | |||
| for s in self.table[self.current + chunk_pos]: | |||
| x = s.consume_terminal(chunk_item) | |||
| for s in table[pos]: | |||
| x = s.consume_terminal(token) | |||
| if x: | |||
| self.table[self.current + chunk_pos + 1].append(x) | |||
| table[pos + 1].append(x) | |||
| added_rules = set() | |||
| self.advance_to(self.current + chunk_pos + 1, added_rules) | |||
| self.advance_to(table, pos + 1, set()) | |||
| if not self.table[-1]: | |||
| raise Exception('Error at line {t.line}:{t.column}'.format(t=chunk[chunk_pos])) | |||
| if not table[-1]: | |||
| raise Exception('Error at line {t.line}:{t.column}'.format(t=stream[pos])) | |||
| self.current += chunk_pos | |||
| return list(self.finish()) | |||
| return list(self.finish(table)) | |||
| def finish(self): | |||
| for t in self.table[-1]: | |||
| def finish(self, table): | |||
| for t in table[-1]: | |||
| if (t.rule.name == self.start | |||
| and t.expect == len(t.rule.symbols) | |||
| and t.reference == 0 | |||
| @@ -1,59 +0,0 @@ | |||
| from lark.tree import Transformer | |||
| from lark.lark import Lark | |||
| calc_grammar = """ | |||
| ?start: sum | |||
| | NAME "=" sum -> *assign_var | |||
| ?sum: product | |||
| | sum "+" product -> *add | |||
| | sum "-" product -> *sub | |||
| ?product: atom | |||
| | product "*" atom -> *mul | |||
| | product "/" atom -> *div | |||
| ?atom: /[\d.]+/ -> *number | |||
| | "-" atom -> *neg | |||
| | NAME -> *var | |||
| | "(" sum ")" | |||
| NAME: /\w+/ | |||
| WS.ignore: /\s+/ | |||
| """ | |||
| class CalculateTree(Transformer): | |||
| from operator import add, sub, mul, div, neg | |||
| number = float | |||
| def __init__(self): | |||
| self.vars = {} | |||
| def assign_var(self, name, value): | |||
| self.vars[name] = value | |||
| return value | |||
| def var(self, name): | |||
| return self.vars[name] | |||
| calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree()) | |||
| calc = calc_parser.parse | |||
| def main(): | |||
| while True: | |||
| try: | |||
| s = raw_input('> ') | |||
| except EOFError: | |||
| break | |||
| print(calc(s)) | |||
| def test(): | |||
| print calc("a = 1+2") | |||
| print calc("1+a*-3") | |||
| if __name__ == '__main__': | |||
| test() | |||
| # main() | |||
| @@ -1,62 +0,0 @@ | |||
| import sys | |||
| from lark.lark import Lark | |||
| from lark.tree import Transformer | |||
| json_grammar = r""" | |||
| ?start: value | |||
| ?value: object | |||
| | array | |||
| | string | |||
| | number | |||
| | "true" -> *true | |||
| | "false" -> *false | |||
| | "null" -> *null | |||
| array : "[" [value ("," value)*] "]" | |||
| object : "{" [pair ("," pair)*] "}" | |||
| pair : string ":" value | |||
| *number : /-?\d+(\.\d+)?([eE][+-]?\d+)?/ | |||
| *string : /".*?(?<!\\)"/ | |||
| WS.ignore.newline: /[ \t\n]+/ | |||
| """ | |||
| class TreeToJson(Transformer): | |||
| def string(self, s): | |||
| return s[1:-1] | |||
| array = list | |||
| pair = tuple | |||
| object = dict | |||
| number = float | |||
| null = lambda self: None | |||
| true = lambda self: True | |||
| false = lambda self: False | |||
| json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
| def test(): | |||
| test_json = ''' | |||
| { | |||
| "empty_object" : {}, | |||
| "empty_array" : [], | |||
| "booleans" : { "YES" : true, "NO" : false }, | |||
| "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], | |||
| "strings" : [ "This", [ "And" , "That" ] ], | |||
| "nothing" : null | |||
| } | |||
| ''' | |||
| j = json_parser.parse(test_json) | |||
| print j | |||
| import json | |||
| assert j == json.loads(test_json) | |||
| if __name__ == '__main__': | |||
| test() | |||
| with open(sys.argv[1]) as f: | |||
| print json_parser.parse(f.read()) | |||
| @@ -1,6 +1,9 @@ | |||
| from __future__ import absolute_import | |||
| from .utils import STRING_TYPE | |||
| import functools | |||
| import types | |||
| from .utils import STRING_TYPE, inline_args | |||
| from .load_grammar import load_grammar | |||
| from .tree import Tree, Transformer | |||
| @@ -36,6 +39,10 @@ class LarkOptions(object): | |||
| self.parser = o.pop('parser', 'earley') | |||
| self.transformer = o.pop('transformer', None) | |||
| assert self.parser in ENGINE_DICT | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
| if o: | |||
| raise ValueError("Unknown options: %s" % o.keys()) | |||
| @@ -45,11 +52,11 @@ class Callback(object): | |||
| class RuleTreeToText(Transformer): | |||
| def expansions(self, *x): | |||
| def expansions(self, x): | |||
| return x | |||
| def expansion(self, *symbols): | |||
| def expansion(self, symbols): | |||
| return [sym.value for sym in symbols], None | |||
| def alias(self, (expansion, _alias), alias): | |||
| def alias(self, ((expansion, _alias), alias)): | |||
| assert _alias is None, (alias, expansion, '-', _alias) | |||
| return expansion, alias.value | |||
| @@ -78,12 +85,6 @@ def create_expand1_tree_builder_function(tree_builder): | |||
| return tree_builder(children) | |||
| return f | |||
| def create_rule_inline(f): | |||
| def _f(children): | |||
| return f(*children) | |||
| return _f | |||
| class LALR: | |||
| def build_parser(self, rules, callback): | |||
| ga = GrammarAnalyzer(rules) | |||
| @@ -109,6 +110,7 @@ class EarleyParser: | |||
| return res[0] | |||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||
| class Lark: | |||
| def __init__(self, grammar, **options): | |||
| @@ -144,10 +146,7 @@ class Lark: | |||
| self.lexer = self._build_lexer() | |||
| if not self.options.only_lex: | |||
| self.parser_engine = { | |||
| 'lalr': LALR, | |||
| 'earley': Earley, | |||
| }[self.options.parser]() | |||
| self.parser_engine = ENGINE_DICT[self.options.parser]() | |||
| self.parser = self._build_parser() | |||
| def _build_lexer(self): | |||
| @@ -171,27 +170,25 @@ class Lark: | |||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | |||
| expand1 = origin.startswith('?') | |||
| inline_args = origin.startswith('*') or (alias and alias.startswith('*')) | |||
| _origin = origin.lstrip('?*') | |||
| if alias: | |||
| alias = alias.lstrip('*') | |||
| _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
| assert not hasattr(callback, _alias) | |||
| f = getattr(transformer, alias or _origin, None) | |||
| if f is None: | |||
| try: | |||
| f = transformer._get_func(alias or _origin) | |||
| # f = getattr(transformer, alias or _origin) | |||
| except AttributeError: | |||
| if alias: | |||
| f = self._create_tree_builder_function(alias) | |||
| else: | |||
| f = self._create_tree_builder_function(_origin) | |||
| if expand1: | |||
| f = create_expand1_tree_builder_function(f) | |||
| else: | |||
| if inline_args: | |||
| f = create_rule_inline(f) | |||
| alias_handler = create_rule_handler(expansion, f) | |||
| assert not hasattr(callback, _alias) | |||
| setattr(callback, _alias, alias_handler) | |||
| rules.append((_origin, expansion, _alias)) | |||
| @@ -3,7 +3,7 @@ from lexer import Lexer, Token | |||
| from grammar_analysis import GrammarAnalyzer | |||
| from parser import Parser | |||
| from tree import Tree as T, Transformer, Visitor | |||
| from tree import Tree as T, Transformer, InlineTransformer, Visitor | |||
| _TOKEN_NAMES = { | |||
| ':' : 'COLON', | |||
| @@ -186,7 +186,7 @@ class SaveDefinitions(object): | |||
| def item(self, *x): pass | |||
| class EBNF_to_BNF(Transformer): | |||
| class EBNF_to_BNF(InlineTransformer): | |||
| def __init__(self): | |||
| self.new_rules = {} | |||
| self.prefix = 'anon' | |||
| @@ -286,7 +286,6 @@ def inline_args(f): | |||
| return f(*args) | |||
| return _f | |||
| class GrammarLoader: | |||
| def __init__(self): | |||
| self.rules = list(generate_aliases()) | |||
| @@ -1,3 +1,4 @@ | |||
| from utils import inline_args | |||
| class Tree(object): | |||
| def __init__(self, data, children): | |||
| @@ -53,20 +54,29 @@ class Tree(object): | |||
| class Transformer(object): | |||
| def _get_func(self, name): | |||
| return getattr(self, name) | |||
| def transform(self, tree): | |||
| items = [self.transform(c) if isinstance(c, Tree) else c for c in tree.children] | |||
| try: | |||
| f = getattr(self, tree.data) | |||
| f = self._get_func(tree.data) | |||
| except AttributeError: | |||
| return self.__default__(tree.data, items) | |||
| else: | |||
| return f(*items) | |||
| return f(items) | |||
| def __default__(self, data, children): | |||
| return Tree(data, children) | |||
| class InlineTransformer(Transformer): | |||
| def _get_func(self, name): | |||
| return inline_args(getattr(self, name)).__get__(self) | |||
| class Visitor(object): | |||
| def visit(self, tree): | |||
| for child in tree.children: | |||
| @@ -49,3 +49,23 @@ except NameError: # Python 3 | |||
| Str = type(u'') | |||
| import functools | |||
| import types | |||
| def inline_args(f): | |||
| # print '@@', f.__name__, type(f), isinstance(f, types.FunctionType), isinstance(f, types.TypeType), isinstance(f, types.BuiltinFunctionType) | |||
| if isinstance(f, types.FunctionType): | |||
| @functools.wraps(f) | |||
| def _f_func(self, args): | |||
| return f(self, *args) | |||
| return _f_func | |||
| elif isinstance(f, (types.TypeType, types.BuiltinFunctionType)): | |||
| @functools.wraps(f) | |||
| def _f_builtin(self, args): | |||
| return f(*args) | |||
| return _f_builtin | |||
| else: | |||
| @functools.wraps(f) | |||
| def _f(self, args): | |||
| return f.__func__(self, *args) | |||
| return _f | |||