From 4076256faab3769e3cd54f92eb5fe25d3793d539 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 20:39:20 +0200 Subject: [PATCH] Reconstruct working again --- lark/parser_frontends.py | 2 +- lark/parsers/earley.py | 18 +++++++++++------- lark/reconstruct.py | 20 ++++++++++---------- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 891615a..cf9d14e 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -140,7 +140,7 @@ class Earley_NoLex: yield sym def parse(self, text): - res = self.parser.parse([Token(x,x) for x in text]) # A little hacky perhaps! + res = self.parser.parse(text) assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' return res[0] diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 0ba74a2..f9dfbfe 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -1,6 +1,9 @@ from ..common import ParseError, UnexpectedToken, is_terminal from .grammar_analysis import GrammarAnalyzer +class EndToken(str): + type = '$end' + class Item: def __init__(self, rule, ptr, start, data): self.rule = rule @@ -34,7 +37,8 @@ class Parser: self.predictions = {} for rule in self.analysis.rules: if rule.origin != '$root': # XXX kinda ugly - self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) + a = rule.alias + self.postprocess[rule] = a if callable(a) else getattr(parser_conf.callback, a) self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)] def parse(self, stream): @@ -49,7 +53,7 @@ class Parser: return {old_item.advance(item.data) for old_item in table[item.start] if not old_item.is_complete and old_item.expect == item.rule.origin} - def process_column(i, term): + def process_column(i, token): assert i == len(table)-1 cur_set = table[i] next_set = set() @@ -63,7 +67,7 @@ class Parser: else: if is_terminal(item.expect): # scan - match = item.expect[0](term) if callable(item.expect[0]) else item.expect[0] == term + match = item.expect[0](token) if callable(item.expect[0]) else item.expect[0] == token.type if match: next_set.add(item.advance(stream[i])) else: @@ -74,9 +78,9 @@ class Parser: cur_set |= to_process - if not next_set and term != '$end': + if not next_set and token.type != '$end': expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete]) - raise UnexpectedToken(term, expect, stream, i) + raise UnexpectedToken(token, expect, stream, i) table.append(next_set) @@ -84,9 +88,9 @@ class Parser: table = [predict(self.start, 0)] for i, char in enumerate(stream): - process_column(i, char.type) + process_column(i, char) - process_column(len(stream), '$end') + process_column(len(stream), EndToken()) # Parse ended. Now build a parse tree solutions = [n.data for n in table[len(stream)] diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 921bd7c..cab5aed 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -2,7 +2,7 @@ import re from collections import defaultdict from .tree import Tree -from .common import is_terminal +from .common import is_terminal, ParserConf from .lexer import Token, TokenDef__Str from .parsers import earley from .lark import Lark @@ -29,12 +29,15 @@ class Reconstructor: self.data = data class MatchTerminal(MatchData): - def match(self, other): + def __call__(self, other): return token_res[self.data].match(other) is not None class MatchTree(MatchData): - def match(self, other): - return self.data == other.data + def __call__(self, other): + try: + return self.data == other.data + except AttributeError: + return False class WriteTokens: def __init__(self, name, expansion): @@ -80,20 +83,17 @@ class Reconstructor: for name, expansions in d.items(): for expansion in expansions: reduced = [sym if sym.startswith('_') or sym in expand1s else - (sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym)) + (MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),) for sym in expansion if not is_discarded_terminal(sym)] name = name.lstrip('!').lstrip('?') - rules.append({'name': name, - 'symbols': reduced, - 'postprocess': WriteTokens(name, expansion).f - }) + rules.append((name, reduced, WriteTokens(name, expansion).f)) self.rules = rules def _reconstruct(self, tree): - parser = earley.Parser(self.rules, tree.data) + parser = earley.Parser(ParserConf(self.rules, {}, tree.data)) res ,= parser.parse(tree.children) # XXX ambiguity? for item in res: