This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

158 linhas
5.1 KiB

  1. from ..utils import bfs, fzset
  2. from ..common import GrammarError, is_terminal
  3. class Rule(object):
  4. """
  5. origin : a symbol
  6. expansion : a list of symbols
  7. """
  8. def __init__(self, origin, expansion, alias=None, options=None):
  9. self.origin = origin
  10. self.expansion = expansion
  11. self.alias = alias
  12. self.options = options
  13. def __repr__(self):
  14. return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))
  15. class RulePtr(object):
  16. def __init__(self, rule, index):
  17. assert isinstance(rule, Rule)
  18. assert index <= len(rule.expansion)
  19. self.rule = rule
  20. self.index = index
  21. def __repr__(self):
  22. before = self.rule.expansion[:self.index]
  23. after = self.rule.expansion[self.index:]
  24. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  25. @property
  26. def next(self):
  27. return self.rule.expansion[self.index]
  28. def advance(self, sym):
  29. assert self.next == sym
  30. return RulePtr(self.rule, self.index+1)
  31. @property
  32. def is_satisfied(self):
  33. return self.index == len(self.rule.expansion)
  34. def __eq__(self, other):
  35. return self.rule == other.rule and self.index == other.index
  36. def __hash__(self):
  37. return hash((self.rule, self.index))
  38. def pairs(lst):
  39. return zip(lst[:-1], lst[1:])
  40. def update_set(set1, set2):
  41. copy = set(set1)
  42. set1 |= set2
  43. return set1 != copy
  44. def calculate_sets(rules):
  45. """Calculate FOLLOW sets.
  46. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  47. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  48. symbols.add('$root') # what about other unused rules?
  49. # foreach grammar rule X ::= Y(1) ... Y(k)
  50. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  51. # NULLABLE = NULLABLE union {X}
  52. # for i = 1 to k
  53. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  54. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  55. # for j = i+1 to k
  56. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  57. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  58. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  59. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  60. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  61. NULLABLE = set()
  62. FIRST = {}
  63. FOLLOW = {}
  64. for sym in symbols:
  65. FIRST[sym]={sym} if is_terminal(sym) else set()
  66. FOLLOW[sym]=set()
  67. changed = True
  68. while changed:
  69. changed = False
  70. for rule in rules:
  71. if set(rule.expansion) <= NULLABLE:
  72. if update_set(NULLABLE, {rule.origin}):
  73. changed = True
  74. for i, sym in enumerate(rule.expansion):
  75. if set(rule.expansion[:i]) <= NULLABLE:
  76. if update_set(FIRST[rule.origin], FIRST[sym]):
  77. changed = True
  78. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  79. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  80. changed = True
  81. for j in range(i+1, len(rule.expansion)):
  82. if set(rule.expansion[i+1:j]) <= NULLABLE:
  83. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  84. changed = True
  85. return FIRST, FOLLOW, NULLABLE
  86. class GrammarAnalyzer(object):
  87. def __init__(self, rule_tuples, start_symbol, debug=False):
  88. self.start_symbol = start_symbol
  89. self.debug = debug
  90. rule_tuples = list(rule_tuples)
  91. rule_tuples.append(('$root', [start_symbol, '$end']))
  92. rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples]
  93. self.rules = set()
  94. self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples}
  95. for origin, exp, alias, options in rule_tuples:
  96. r = Rule( origin, exp, alias, options )
  97. self.rules.add(r)
  98. self.rules_by_origin[origin].append(r)
  99. for r in self.rules:
  100. for sym in r.expansion:
  101. if not (is_terminal(sym) or sym in self.rules_by_origin):
  102. raise GrammarError("Using an undefined rule: %s" % sym)
  103. self.init_state = self.expand_rule(start_symbol)
  104. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)
  105. def expand_rule(self, rule):
  106. "Returns all init_ptrs accessible by rule (recursive)"
  107. init_ptrs = set()
  108. def _expand_rule(rule):
  109. assert not is_terminal(rule), rule
  110. for r in self.rules_by_origin[rule]:
  111. init_ptr = RulePtr(r, 0)
  112. init_ptrs.add(init_ptr)
  113. if r.expansion: # if not empty rule
  114. new_r = init_ptr.next
  115. if not is_terminal(new_r):
  116. yield new_r
  117. _ = list(bfs([rule], _expand_rule))
  118. return fzset(init_ptrs)
  119. def _first(self, r):
  120. if is_terminal(r):
  121. return {r}
  122. else:
  123. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}