This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

147 linhas
4.6 KiB

  1. from ..utils import bfs, fzset, classify
  2. from ..common import GrammarError, is_terminal
  3. from ..grammar import Rule
  4. class RulePtr(object):
  5. def __init__(self, rule, index):
  6. assert isinstance(rule, Rule)
  7. assert index <= len(rule.expansion)
  8. self.rule = rule
  9. self.index = index
  10. def __repr__(self):
  11. before = self.rule.expansion[:self.index]
  12. after = self.rule.expansion[self.index:]
  13. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  14. @property
  15. def next(self):
  16. return self.rule.expansion[self.index]
  17. def advance(self, sym):
  18. assert self.next == sym
  19. return RulePtr(self.rule, self.index+1)
  20. @property
  21. def is_satisfied(self):
  22. return self.index == len(self.rule.expansion)
  23. def __eq__(self, other):
  24. return self.rule == other.rule and self.index == other.index
  25. def __hash__(self):
  26. return hash((self.rule, self.index))
  27. def update_set(set1, set2):
  28. if not set2:
  29. return False
  30. copy = set(set1)
  31. set1 |= set2
  32. return set1 != copy
  33. def calculate_sets(rules):
  34. """Calculate FOLLOW sets.
  35. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  36. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  37. # foreach grammar rule X ::= Y(1) ... Y(k)
  38. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  39. # NULLABLE = NULLABLE union {X}
  40. # for i = 1 to k
  41. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  42. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  43. # for j = i+1 to k
  44. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  45. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  46. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  47. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  48. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  49. NULLABLE = set()
  50. FIRST = {}
  51. FOLLOW = {}
  52. for sym in symbols:
  53. FIRST[sym]={sym} if is_terminal(sym) else set()
  54. FOLLOW[sym]=set()
  55. # Calculate NULLABLE and FIRST
  56. changed = True
  57. while changed:
  58. changed = False
  59. for rule in rules:
  60. if set(rule.expansion) <= NULLABLE:
  61. if update_set(NULLABLE, {rule.origin}):
  62. changed = True
  63. for i, sym in enumerate(rule.expansion):
  64. if set(rule.expansion[:i]) <= NULLABLE:
  65. if update_set(FIRST[rule.origin], FIRST[sym]):
  66. changed = True
  67. # Calculate FOLLOW
  68. changed = True
  69. while changed:
  70. changed = False
  71. for rule in rules:
  72. for i, sym in enumerate(rule.expansion):
  73. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  74. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  75. changed = True
  76. for j in range(i+1, len(rule.expansion)):
  77. if set(rule.expansion[i+1:j]) <= NULLABLE:
  78. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  79. changed = True
  80. return FIRST, FOLLOW, NULLABLE
  81. class GrammarAnalyzer(object):
  82. def __init__(self, parser_conf, debug=False):
  83. self.debug = debug
  84. rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])]
  85. self.rules_by_origin = classify(rules, lambda r: r.origin)
  86. assert len(rules) == len(set(rules))
  87. for r in rules:
  88. for sym in r.expansion:
  89. if not (is_terminal(sym) or sym in self.rules_by_origin):
  90. raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
  91. self.start_state = self.expand_rule('$root')
  92. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  93. def expand_rule(self, rule):
  94. "Returns all init_ptrs accessible by rule (recursive)"
  95. init_ptrs = set()
  96. def _expand_rule(rule):
  97. assert not is_terminal(rule), rule
  98. for r in self.rules_by_origin[rule]:
  99. init_ptr = RulePtr(r, 0)
  100. init_ptrs.add(init_ptr)
  101. if r.expansion: # if not empty rule
  102. new_r = init_ptr.next
  103. if not is_terminal(new_r):
  104. yield new_r
  105. _ = list(bfs([rule], _expand_rule))
  106. return fzset(init_ptrs)
  107. def _first(self, r):
  108. if is_terminal(r):
  109. return {r}
  110. else:
  111. return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}