This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.9 KiB

  1. from collections import Counter
  2. from ..utils import bfs, fzset, classify
  3. from ..exceptions import GrammarError
  4. from ..grammar import Rule, Terminal, NonTerminal
  5. class RulePtr(object):
  6. __slots__ = ('rule', 'index')
  7. def __init__(self, rule, index):
  8. assert isinstance(rule, Rule)
  9. assert index <= len(rule.expansion)
  10. self.rule = rule
  11. self.index = index
  12. def __repr__(self):
  13. before = [x.name for x in self.rule.expansion[:self.index]]
  14. after = [x.name for x in self.rule.expansion[self.index:]]
  15. return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
  16. @property
  17. def next(self):
  18. return self.rule.expansion[self.index]
  19. def advance(self, sym):
  20. assert self.next == sym
  21. return RulePtr(self.rule, self.index+1)
  22. @property
  23. def is_satisfied(self):
  24. return self.index == len(self.rule.expansion)
  25. def __eq__(self, other):
  26. return self.rule == other.rule and self.index == other.index
  27. def __hash__(self):
  28. return hash((self.rule, self.index))
  29. def update_set(set1, set2):
  30. if not set2:
  31. return False
  32. copy = set(set1)
  33. set1 |= set2
  34. return set1 != copy
  35. def calculate_sets(rules):
  36. """Calculate FOLLOW sets.
  37. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  38. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  39. # foreach grammar rule X ::= Y(1) ... Y(k)
  40. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  41. # NULLABLE = NULLABLE union {X}
  42. # for i = 1 to k
  43. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  44. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  45. # for j = i+1 to k
  46. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  47. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  48. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  49. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  50. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  51. NULLABLE = set()
  52. FIRST = {}
  53. FOLLOW = {}
  54. for sym in symbols:
  55. FIRST[sym]={sym} if sym.is_term else set()
  56. FOLLOW[sym]=set()
  57. # Calculate NULLABLE and FIRST
  58. changed = True
  59. while changed:
  60. changed = False
  61. for rule in rules:
  62. if set(rule.expansion) <= NULLABLE:
  63. if update_set(NULLABLE, {rule.origin}):
  64. changed = True
  65. for i, sym in enumerate(rule.expansion):
  66. if set(rule.expansion[:i]) <= NULLABLE:
  67. if update_set(FIRST[rule.origin], FIRST[sym]):
  68. changed = True
  69. # Calculate FOLLOW
  70. changed = True
  71. while changed:
  72. changed = False
  73. for rule in rules:
  74. for i, sym in enumerate(rule.expansion):
  75. if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
  76. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  77. changed = True
  78. for j in range(i+1, len(rule.expansion)):
  79. if set(rule.expansion[i+1:j]) <= NULLABLE:
  80. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  81. changed = True
  82. return FIRST, FOLLOW, NULLABLE
  83. class GrammarAnalyzer(object):
  84. def __init__(self, parser_conf, debug=False):
  85. self.debug = debug
  86. rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
  87. self.rules_by_origin = classify(rules, lambda r: r.origin)
  88. if len(rules) != len(set(rules)):
  89. duplicates = [item for item, count in Counter(rules).items() if count > 1]
  90. raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
  91. for r in rules:
  92. for sym in r.expansion:
  93. if not (sym.is_term or sym in self.rules_by_origin):
  94. raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
  95. self.start_state = self.expand_rule(NonTerminal('$root'))
  96. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  97. def expand_rule(self, rule):
  98. "Returns all init_ptrs accessible by rule (recursive)"
  99. init_ptrs = set()
  100. def _expand_rule(rule):
  101. assert not rule.is_term, rule
  102. for r in self.rules_by_origin[rule]:
  103. init_ptr = RulePtr(r, 0)
  104. init_ptrs.add(init_ptr)
  105. if r.expansion: # if not empty rule
  106. new_r = init_ptr.next
  107. if not new_r.is_term:
  108. yield new_r
  109. for _ in bfs([rule], _expand_rule):
  110. pass
  111. return fzset(init_ptrs)
  112. def _first(self, r):
  113. if r.is_term:
  114. return {r}
  115. else:
  116. return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}