Просмотр исходного кода

BUGFIX in xearley + Feature: earley__predict_all

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 6 лет назад
Родитель
Сommit
5748920df4
4 измененных файлов: 14 добавлений и 8 удалений
  1. +1
    -0
      lark/lark.py
  2. +1
    -0
      lark/parser_frontends.py
  3. +6
    -4
      lark/parsers/earley.py
  4. +6
    -4
      lark/parsers/xearley.py

+ 1
- 0
lark/lark.py Просмотреть файл

@@ -57,6 +57,7 @@ class LarkOptions(object):
self.profile = o.pop('profile', False) self.profile = o.pop('profile', False)
self.ambiguity = o.pop('ambiguity', 'auto') self.ambiguity = o.pop('ambiguity', 'auto')
self.propagate_positions = o.pop('propagate_positions', False) self.propagate_positions = o.pop('propagate_positions', False)
self.earley__predict_all = o.pop('earley__predict_all', False)


assert self.parser in ('earley', 'lalr', None) assert self.parser in ('earley', 'lalr', None)




+ 1
- 0
lark/parser_frontends.py Просмотреть файл

@@ -126,6 +126,7 @@ class XEarley:
parser_conf.callback, parser_conf.callback,
resolve_ambiguity=get_ambiguity_resolver(options), resolve_ambiguity=get_ambiguity_resolver(options),
ignore=ignore, ignore=ignore,
predict_all=options.earley__predict_all
) )


def _prepare_expansion(self, expansion): def _prepare_expansion(self, expansion):


+ 6
- 4
lark/parsers/earley.py Просмотреть файл

@@ -90,7 +90,7 @@ class NewsList(list):


class Column: class Column:
"An entry in the table, aka Earley Chart. Contains lists of items." "An entry in the table, aka Earley Chart. Contains lists of items."
def __init__(self, i, FIRST):
def __init__(self, i, FIRST, predict_all=False):
self.i = i self.i = i
self.to_reduce = NewsList() self.to_reduce = NewsList()
self.to_predict = NewsList() self.to_predict = NewsList()
@@ -100,6 +100,7 @@ class Column:


self.predicted = set() self.predicted = set()
self.completed = {} self.completed = {}
self.predict_all = predict_all


def add(self, items): def add(self, items):
"""Sort items into scan/predict/reduce newslists """Sort items into scan/predict/reduce newslists
@@ -108,9 +109,9 @@ class Column:
""" """
for item in items: for item in items:


item_key = item, item.tree # Elsewhere, tree is not part of the comparison
if item.is_complete: if item.is_complete:
# XXX Potential bug: What happens if there's ambiguity in an empty rule? # XXX Potential bug: What happens if there's ambiguity in an empty rule?
item_key = item, item.tree # Elsewhere, tree is not part of the comparison
if item.rule.expansion and item_key in self.completed: if item.rule.expansion and item_key in self.completed:
old_tree = self.completed[item_key].tree old_tree = self.completed[item_key].tree
if old_tree == item.tree: if old_tree == item.tree:
@@ -137,9 +138,10 @@ class Column:
if isinstance(item.expect, Terminal): if isinstance(item.expect, Terminal):
self.to_scan.append(item) self.to_scan.append(item)
else: else:
if item in self.predicted:
k = item_key if self.predict_all else item
if k in self.predicted:
continue continue
self.predicted.add(item)
self.predicted.add(k)
self.to_predict.append(item) self.to_predict.append(item)


self.item_count += 1 # Only count if actually added self.item_count += 1 # Only count if actually added


+ 6
- 4
lark/parsers/xearley.py Просмотреть файл

@@ -28,11 +28,12 @@ from .grammar_analysis import GrammarAnalyzer
from .earley import ApplyCallbacks, Item, Column from .earley import ApplyCallbacks, Item, Column


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=()):
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=(), predict_all=False):
self.analysis = GrammarAnalyzer(rules, start_symbol) self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol self.start_symbol = start_symbol
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
self.ignore = list(ignore) self.ignore = list(ignore)
self.predict_all = predict_all




self.postprocess = {} self.postprocess = {}
@@ -107,9 +108,10 @@ class Parser:
for j in range(1, len(s)): for j in range(1, len(s)):
m = item.expect.match(s[:-j]) m = item.expect.match(s[:-j])
if m: if m:
delayed_matches[m.end()].append(item.advance(m.group(0)))
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[i+m.end()].append(item.advance(t))


next_set = Column(i+1, self.FIRST)
next_set = Column(i+1, self.FIRST, predict_all=self.predict_all)
next_set.add(delayed_matches[i+1]) next_set.add(delayed_matches[i+1])
del delayed_matches[i+1] # No longer needed, so unburden memory del delayed_matches[i+1] # No longer needed, so unburden memory


@@ -119,7 +121,7 @@ class Parser:
return next_set return next_set


# Main loop starts # Main loop starts
column0 = Column(0, self.FIRST)
column0 = Column(0, self.FIRST, predict_all=self.predict_all)
column0.add(predict(start_symbol, column0)) column0.add(predict(start_symbol, column0))


column = column0 column = column0


Загрузка…
Отмена
Сохранить