This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
4.6 KiB

  1. from ..utils import bfs, fzset
  2. from ..common import GrammarError, is_terminal
  3. from ..grammar import Rule
  4. class RulePtr(object):
  5. def __init__(self, rule, index):
  6. assert isinstance(rule, Rule)
  7. assert index <= len(rule.expansion)
  8. self.rule = rule
  9. self.index = index
  10. def __repr__(self):
  11. before = self.rule.expansion[:self.index]
  12. after = self.rule.expansion[self.index:]
  13. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  14. @property
  15. def next(self):
  16. return self.rule.expansion[self.index]
  17. def advance(self, sym):
  18. assert self.next == sym
  19. return RulePtr(self.rule, self.index+1)
  20. @property
  21. def is_satisfied(self):
  22. return self.index == len(self.rule.expansion)
  23. def __eq__(self, other):
  24. return self.rule == other.rule and self.index == other.index
  25. def __hash__(self):
  26. return hash((self.rule, self.index))
  27. def pairs(lst):
  28. return zip(lst[:-1], lst[1:])
  29. def update_set(set1, set2):
  30. copy = set(set1)
  31. set1 |= set2
  32. return set1 != copy
  33. def calculate_sets(rules):
  34. """Calculate FOLLOW sets.
  35. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  36. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  37. # foreach grammar rule X ::= Y(1) ... Y(k)
  38. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  39. # NULLABLE = NULLABLE union {X}
  40. # for i = 1 to k
  41. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  42. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  43. # for j = i+1 to k
  44. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  45. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  46. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  47. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  48. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  49. NULLABLE = set()
  50. FIRST = {}
  51. FOLLOW = {}
  52. for sym in symbols:
  53. FIRST[sym]={sym} if is_terminal(sym) else set()
  54. FOLLOW[sym]=set()
  55. changed = True
  56. while changed:
  57. changed = False
  58. for rule in rules:
  59. if set(rule.expansion) <= NULLABLE:
  60. if update_set(NULLABLE, {rule.origin}):
  61. changed = True
  62. for i, sym in enumerate(rule.expansion):
  63. if set(rule.expansion[:i]) <= NULLABLE:
  64. if update_set(FIRST[rule.origin], FIRST[sym]):
  65. changed = True
  66. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  67. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  68. changed = True
  69. for j in range(i+1, len(rule.expansion)):
  70. if set(rule.expansion[i+1:j]) <= NULLABLE:
  71. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  72. changed = True
  73. return FIRST, FOLLOW, NULLABLE
  74. class GrammarAnalyzer(object):
  75. def __init__(self, parser_conf, debug=False):
  76. rules = parser_conf.rules
  77. assert len(rules) == len(set(rules))
  78. self.start_symbol = parser_conf.start
  79. self.debug = debug
  80. root_rule = Rule('$root', [self.start_symbol, '$END'])
  81. self.rules_by_origin = {r.origin: [] for r in rules}
  82. for r in rules:
  83. self.rules_by_origin[r.origin].append(r)
  84. self.rules_by_origin[root_rule.origin] = [root_rule]
  85. for r in rules:
  86. for sym in r.expansion:
  87. if not (is_terminal(sym) or sym in self.rules_by_origin):
  88. raise GrammarError("Using an undefined rule: %s" % sym)
  89. self.start_state = self.expand_rule('$root')
  90. self.rules = rules
  91. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules + [root_rule])
  92. def expand_rule(self, rule):
  93. "Returns all init_ptrs accessible by rule (recursive)"
  94. init_ptrs = set()
  95. def _expand_rule(rule):
  96. assert not is_terminal(rule), rule
  97. for r in self.rules_by_origin[rule]:
  98. init_ptr = RulePtr(r, 0)
  99. init_ptrs.add(init_ptr)
  100. if r.expansion: # if not empty rule
  101. new_r = init_ptr.next
  102. if not is_terminal(new_r):
  103. yield new_r
  104. _ = list(bfs([rule], _expand_rule))
  105. return fzset(init_ptrs)
  106. def _first(self, r):
  107. if is_terminal(r):
  108. return {r}
  109. else:
  110. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}