This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

157 líneas
5.0 KiB

  1. from ..utils import bfs, fzset
  2. from ..common import GrammarError, is_terminal
  3. class Rule(object):
  4. """
  5. origin : a symbol
  6. expansion : a list of symbols
  7. """
  8. def __init__(self, origin, expansion, alias=None):
  9. self.origin = origin
  10. self.expansion = expansion
  11. self.alias = alias
  12. def __repr__(self):
  13. return '<%s : %s>' % (self.origin, ' '.join(map(unicode,self.expansion)))
  14. class RulePtr(object):
  15. def __init__(self, rule, index):
  16. assert isinstance(rule, Rule)
  17. assert index <= len(rule.expansion)
  18. self.rule = rule
  19. self.index = index
  20. def __repr__(self):
  21. before = self.rule.expansion[:self.index]
  22. after = self.rule.expansion[self.index:]
  23. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  24. @property
  25. def next(self):
  26. return self.rule.expansion[self.index]
  27. def advance(self, sym):
  28. assert self.next == sym
  29. return RulePtr(self.rule, self.index+1)
  30. @property
  31. def is_satisfied(self):
  32. return self.index == len(self.rule.expansion)
  33. def __eq__(self, other):
  34. return self.rule == other.rule and self.index == other.index
  35. def __hash__(self):
  36. return hash((self.rule, self.index))
  37. def pairs(lst):
  38. return zip(lst[:-1], lst[1:])
  39. def update_set(set1, set2):
  40. copy = set(set1)
  41. set1 |= set2
  42. return set1 != copy
  43. def calculate_sets(rules):
  44. """Calculate FOLLOW sets.
  45. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  46. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  47. symbols.add('$root') # what about other unused rules?
  48. # foreach grammar rule X ::= Y(1) ... Y(k)
  49. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  50. # NULLABLE = NULLABLE union {X}
  51. # for i = 1 to k
  52. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  53. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  54. # for j = i+1 to k
  55. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  56. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  57. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  58. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  59. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  60. NULLABLE = set()
  61. FIRST = {}
  62. FOLLOW = {}
  63. for sym in symbols:
  64. FIRST[sym]={sym} if is_terminal(sym) else set()
  65. FOLLOW[sym]=set()
  66. changed = True
  67. while changed:
  68. changed = False
  69. for rule in rules:
  70. if set(rule.expansion) <= NULLABLE:
  71. if update_set(NULLABLE, {rule.origin}):
  72. changed = True
  73. for i, sym in enumerate(rule.expansion):
  74. if set(rule.expansion[:i]) <= NULLABLE:
  75. if update_set(FIRST[rule.origin], FIRST[sym]):
  76. changed = True
  77. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  78. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  79. changed = True
  80. for j in range(i+1, len(rule.expansion)):
  81. if set(rule.expansion[i+1:j]) <= NULLABLE:
  82. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  83. changed = True
  84. return FIRST, FOLLOW, NULLABLE
  85. class GrammarAnalyzer(object):
  86. def __init__(self, rule_tuples, start_symbol, debug=False):
  87. self.start_symbol = start_symbol
  88. self.debug = debug
  89. rule_tuples = list(rule_tuples)
  90. rule_tuples.append(('$root', [start_symbol, '$end']))
  91. rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
  92. self.rules = set()
  93. self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
  94. for origin, exp, alias in rule_tuples:
  95. r = Rule( origin, exp, alias )
  96. self.rules.add(r)
  97. self.rules_by_origin[origin].append(r)
  98. for r in self.rules:
  99. for sym in r.expansion:
  100. if not (is_terminal(sym) or sym in self.rules_by_origin):
  101. raise GrammarError("Using an undefined rule: %s" % sym)
  102. self.init_state = self.expand_rule(start_symbol)
  103. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
  104. def expand_rule(self, rule):
  105. "Returns all init_ptrs accessible by rule (recursive)"
  106. init_ptrs = set()
  107. def _expand_rule(rule):
  108. assert not is_terminal(rule), rule
  109. for r in self.rules_by_origin[rule]:
  110. init_ptr = RulePtr(r, 0)
  111. init_ptrs.add(init_ptr)
  112. if r.expansion: # if not empty rule
  113. new_r = init_ptr.next
  114. if not is_terminal(new_r):
  115. yield new_r
  116. _ = list(bfs([rule], _expand_rule))
  117. return fzset(init_ptrs)
  118. def _first(self, r):
  119. if is_terminal(r):
  120. return {r}
  121. else:
  122. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}