From 5748920df4ab6d2cb3eaf430752efc38ecfeaf13 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Tue, 12 Dec 2017 14:17:48 +0200 Subject: [PATCH] BUGFIX in xearley + Feature: earley__predict_all --- lark/lark.py | 1 + lark/parser_frontends.py | 1 + lark/parsers/earley.py | 10 ++++++---- lark/parsers/xearley.py | 10 ++++++---- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index b8c8efe..d8ee186 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -57,6 +57,7 @@ class LarkOptions(object): self.profile = o.pop('profile', False) self.ambiguity = o.pop('ambiguity', 'auto') self.propagate_positions = o.pop('propagate_positions', False) + self.earley__predict_all = o.pop('earley__predict_all', False) assert self.parser in ('earley', 'lalr', None) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 18264ce..718a0f9 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -126,6 +126,7 @@ class XEarley: parser_conf.callback, resolve_ambiguity=get_ambiguity_resolver(options), ignore=ignore, + predict_all=options.earley__predict_all ) def _prepare_expansion(self, expansion): diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 1926afa..55893f5 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -90,7 +90,7 @@ class NewsList(list): class Column: "An entry in the table, aka Earley Chart. Contains lists of items." - def __init__(self, i, FIRST): + def __init__(self, i, FIRST, predict_all=False): self.i = i self.to_reduce = NewsList() self.to_predict = NewsList() @@ -100,6 +100,7 @@ class Column: self.predicted = set() self.completed = {} + self.predict_all = predict_all def add(self, items): """Sort items into scan/predict/reduce newslists @@ -108,9 +109,9 @@ class Column: """ for item in items: + item_key = item, item.tree # Elsewhere, tree is not part of the comparison if item.is_complete: # XXX Potential bug: What happens if there's ambiguity in an empty rule? - item_key = item, item.tree # Elsewhere, tree is not part of the comparison if item.rule.expansion and item_key in self.completed: old_tree = self.completed[item_key].tree if old_tree == item.tree: @@ -137,9 +138,10 @@ class Column: if isinstance(item.expect, Terminal): self.to_scan.append(item) else: - if item in self.predicted: + k = item_key if self.predict_all else item + if k in self.predicted: continue - self.predicted.add(item) + self.predicted.add(k) self.to_predict.append(item) self.item_count += 1 # Only count if actually added diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 59ecb84..9b26190 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -28,11 +28,12 @@ from .grammar_analysis import GrammarAnalyzer from .earley import ApplyCallbacks, Item, Column class Parser: - def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=()): + def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=(), predict_all=False): self.analysis = GrammarAnalyzer(rules, start_symbol) self.start_symbol = start_symbol self.resolve_ambiguity = resolve_ambiguity self.ignore = list(ignore) + self.predict_all = predict_all self.postprocess = {} @@ -107,9 +108,10 @@ class Parser: for j in range(1, len(s)): m = item.expect.match(s[:-j]) if m: - delayed_matches[m.end()].append(item.advance(m.group(0))) + t = Token(item.expect.name, m.group(0), i, text_line, text_column) + delayed_matches[i+m.end()].append(item.advance(t)) - next_set = Column(i+1, self.FIRST) + next_set = Column(i+1, self.FIRST, predict_all=self.predict_all) next_set.add(delayed_matches[i+1]) del delayed_matches[i+1] # No longer needed, so unburden memory @@ -119,7 +121,7 @@ class Parser: return next_set # Main loop starts - column0 = Column(0, self.FIRST) + column0 = Column(0, self.FIRST, predict_all=self.predict_all) column0.add(predict(start_symbol, column0)) column = column0