This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 

186 行
6.3 KiB

  1. from collections import Counter, defaultdict
  2. from ..utils import bfs, fzset, classify
  3. from ..exceptions import GrammarError
  4. from ..grammar import Rule, Terminal, NonTerminal
  5. class RulePtr(object):
  6. __slots__ = ('rule', 'index')
  7. def __init__(self, rule, index):
  8. assert isinstance(rule, Rule)
  9. assert index <= len(rule.expansion)
  10. self.rule = rule
  11. self.index = index
  12. def __repr__(self):
  13. before = [x.name for x in self.rule.expansion[:self.index]]
  14. after = [x.name for x in self.rule.expansion[self.index:]]
  15. return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
  16. @property
  17. def next(self):
  18. return self.rule.expansion[self.index]
  19. def advance(self, sym):
  20. assert self.next == sym
  21. return RulePtr(self.rule, self.index+1)
  22. @property
  23. def is_satisfied(self):
  24. return self.index == len(self.rule.expansion)
  25. def __eq__(self, other):
  26. return self.rule == other.rule and self.index == other.index
  27. def __hash__(self):
  28. return hash((self.rule, self.index))
  29. # state generation ensures no duplicate LR0ItemSets
  30. class LR0ItemSet(object):
  31. __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
  32. def __init__(self, kernel, closure):
  33. self.kernel = fzset(kernel)
  34. self.closure = fzset(closure)
  35. self.transitions = {}
  36. self.lookaheads = defaultdict(set)
  37. def __repr__(self):
  38. return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
  39. def update_set(set1, set2):
  40. if not set2 or set1 > set2:
  41. return False
  42. copy = set(set1)
  43. set1 |= set2
  44. return set1 != copy
  45. def calculate_sets(rules):
  46. """Calculate FOLLOW sets.
  47. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  48. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  49. # foreach grammar rule X ::= Y(1) ... Y(k)
  50. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  51. # NULLABLE = NULLABLE union {X}
  52. # for i = 1 to k
  53. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  54. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  55. # for j = i+1 to k
  56. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  57. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  58. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  59. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  60. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  61. NULLABLE = set()
  62. FIRST = {}
  63. FOLLOW = {}
  64. for sym in symbols:
  65. FIRST[sym]={sym} if sym.is_term else set()
  66. FOLLOW[sym]=set()
  67. # Calculate NULLABLE and FIRST
  68. changed = True
  69. while changed:
  70. changed = False
  71. for rule in rules:
  72. if set(rule.expansion) <= NULLABLE:
  73. if update_set(NULLABLE, {rule.origin}):
  74. changed = True
  75. for i, sym in enumerate(rule.expansion):
  76. if set(rule.expansion[:i]) <= NULLABLE:
  77. if update_set(FIRST[rule.origin], FIRST[sym]):
  78. changed = True
  79. else:
  80. break
  81. # Calculate FOLLOW
  82. changed = True
  83. while changed:
  84. changed = False
  85. for rule in rules:
  86. for i, sym in enumerate(rule.expansion):
  87. if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
  88. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  89. changed = True
  90. for j in range(i+1, len(rule.expansion)):
  91. if set(rule.expansion[i+1:j]) <= NULLABLE:
  92. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  93. changed = True
  94. return FIRST, FOLLOW, NULLABLE
  95. class GrammarAnalyzer(object):
  96. def __init__(self, parser_conf, debug=False):
  97. self.debug = debug
  98. root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
  99. for start in parser_conf.start}
  100. rules = parser_conf.rules + list(root_rules.values())
  101. self.rules_by_origin = classify(rules, lambda r: r.origin)
  102. if len(rules) != len(set(rules)):
  103. duplicates = [item for item, count in Counter(rules).items() if count > 1]
  104. raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
  105. for r in rules:
  106. for sym in r.expansion:
  107. if not (sym.is_term or sym in self.rules_by_origin):
  108. raise GrammarError("Using an undefined rule: %s" % sym)
  109. self.start_states = {start: self.expand_rule(root_rule.origin)
  110. for start, root_rule in root_rules.items()}
  111. self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
  112. for start, root_rule in root_rules.items()}
  113. lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
  114. for start in parser_conf.start}
  115. lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
  116. assert(len(lr0_rules) == len(set(lr0_rules)))
  117. self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
  118. # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
  119. self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
  120. for start, root_rule in lr0_root_rules.items()}
  121. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  122. def expand_rule(self, source_rule, rules_by_origin=None):
  123. "Returns all init_ptrs accessible by rule (recursive)"
  124. if rules_by_origin is None:
  125. rules_by_origin = self.rules_by_origin
  126. init_ptrs = set()
  127. def _expand_rule(rule):
  128. assert not rule.is_term, rule
  129. for r in rules_by_origin[rule]:
  130. init_ptr = RulePtr(r, 0)
  131. init_ptrs.add(init_ptr)
  132. if r.expansion: # if not empty rule
  133. new_r = init_ptr.next
  134. if not new_r.is_term:
  135. yield new_r
  136. for _ in bfs([source_rule], _expand_rule):
  137. pass
  138. return fzset(init_ptrs)