浏览代码

Simplified grammar analysis. Improved performance of FIRST/FOLLOW calculations

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.4
Erez Shinan 6 年前
父节点
当前提交
6c28010581
共有 4 个文件被更改,包括 19 次插入22 次删除
  1. +2
    -2
      lark/parser_frontends.py
  2. +15
    -18
      lark/parsers/grammar_analysis.py
  3. +1
    -1
      lark/parsers/lalr_analysis.py
  4. +1
    -1
      lark/parsers/lalr_parser.py

+ 2
- 2
lark/parser_frontends.py 查看文件

@@ -143,10 +143,10 @@ class CYK(WithLexer):
self.init_traditional_lexer(lexer_conf)

self._analysis = GrammarAnalyzer(parser_conf)
self._parser = cyk.Parser(self._analysis.rules, parser_conf.start)
self._parser = cyk.Parser(parser_conf.rules, parser_conf.start)

self._postprocess = {}
for rule in self._analysis.rules:
for rule in parser_conf.rules:
a = rule.alias
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a))



+ 15
- 18
lark/parsers/grammar_analysis.py 查看文件

@@ -1,5 +1,5 @@

from ..utils import bfs, fzset
from ..utils import bfs, fzset, classify
from ..common import GrammarError, is_terminal
from ..grammar import Rule

@@ -34,9 +34,6 @@ class RulePtr(object):
return hash((self.rule, self.index))


def pairs(lst):
return zip(lst[:-1], lst[1:])

def update_set(set1, set2):
copy = set(set1)
set1 |= set2
@@ -68,6 +65,7 @@ def calculate_sets(rules):
FIRST[sym]={sym} if is_terminal(sym) else set()
FOLLOW[sym]=set()

# Calculate NULLABLE and FIRST
changed = True
while changed:
changed = False
@@ -81,6 +79,14 @@ def calculate_sets(rules):
if set(rule.expansion[:i]) <= NULLABLE:
if update_set(FIRST[rule.origin], FIRST[sym]):
changed = True

# Calculate FOLLOW
changed = True
while changed:
changed = False

for rule in rules:
for i, sym in enumerate(rule.expansion):
if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
changed = True
@@ -95,29 +101,20 @@ def calculate_sets(rules):

class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False):
rules = parser_conf.rules
assert len(rules) == len(set(rules))

self.start_symbol = parser_conf.start
self.debug = debug

root_rule = Rule('$root', [self.start_symbol, '$END'])

self.rules_by_origin = {r.origin: [] for r in rules}
for r in rules:
self.rules_by_origin[r.origin].append(r)

self.rules_by_origin[root_rule.origin] = [root_rule]
rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])]
self.rules_by_origin = classify(rules, lambda r: r.origin)

assert len(rules) == len(set(rules))
for r in rules:
for sym in r.expansion:
if not (is_terminal(sym) or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)
raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation

self.start_state = self.expand_rule('$root')
self.rules = rules

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules + [root_rule])
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)

def expand_rule(self, rule):
"Returns all init_ptrs accessible by rule (recursive)"


+ 1
- 1
lark/parsers/lalr_analysis.py 查看文件

@@ -77,7 +77,7 @@ class LALR_Analyzer(GrammarAnalyzer):
lookahead[sym].append((Shift, new_state))
if sym == '$END':
self.end_states.append( new_state )
yield fzset(rps)
yield new_state

for k, v in lookahead.items():
if len(v) > 1:


+ 1
- 1
lark/parsers/lalr_parser.py 查看文件

@@ -14,7 +14,7 @@ class Parser:
self.analysis = analysis = LALR_Analyzer(parser_conf)
analysis.compute_lookahead()
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules}
for rule in parser_conf.rules}

self.parser_conf = parser_conf
self.parser = _Parser(analysis.parse_table, callbacks)


正在加载...
取消
保存