This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

150 linhas
4.6 KiB

  1. from ..utils import bfs, fzset, classify
  2. from ..common import GrammarError
  3. from ..grammar import Rule, Terminal, NonTerminal
  4. class RulePtr(object):
  5. __slots__ = ('rule', 'index')
  6. def __init__(self, rule, index):
  7. assert isinstance(rule, Rule)
  8. assert index <= len(rule.expansion)
  9. self.rule = rule
  10. self.index = index
  11. def __repr__(self):
  12. before = self.rule.expansion[:self.index]
  13. after = self.rule.expansion[self.index:]
  14. return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))
  15. @property
  16. def next(self):
  17. return self.rule.expansion[self.index]
  18. def advance(self, sym):
  19. assert self.next == sym
  20. return RulePtr(self.rule, self.index+1)
  21. @property
  22. def is_satisfied(self):
  23. return self.index == len(self.rule.expansion)
  24. def __eq__(self, other):
  25. return self.rule == other.rule and self.index == other.index
  26. def __hash__(self):
  27. return hash((self.rule, self.index))
  28. def update_set(set1, set2):
  29. if not set2:
  30. return False
  31. copy = set(set1)
  32. set1 |= set2
  33. return set1 != copy
  34. def calculate_sets(rules):
  35. """Calculate FOLLOW sets.
  36. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  37. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  38. # foreach grammar rule X ::= Y(1) ... Y(k)
  39. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  40. # NULLABLE = NULLABLE union {X}
  41. # for i = 1 to k
  42. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  43. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  44. # for j = i+1 to k
  45. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  46. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  47. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  48. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  49. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  50. NULLABLE = set()
  51. FIRST = {}
  52. FOLLOW = {}
  53. for sym in symbols:
  54. FIRST[sym]={sym} if sym.is_term else set()
  55. FOLLOW[sym]=set()
  56. # Calculate NULLABLE and FIRST
  57. changed = True
  58. while changed:
  59. changed = False
  60. for rule in rules:
  61. if set(rule.expansion) <= NULLABLE:
  62. if update_set(NULLABLE, {rule.origin}):
  63. changed = True
  64. for i, sym in enumerate(rule.expansion):
  65. if set(rule.expansion[:i]) <= NULLABLE:
  66. if update_set(FIRST[rule.origin], FIRST[sym]):
  67. changed = True
  68. # Calculate FOLLOW
  69. changed = True
  70. while changed:
  71. changed = False
  72. for rule in rules:
  73. for i, sym in enumerate(rule.expansion):
  74. if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
  75. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  76. changed = True
  77. for j in range(i+1, len(rule.expansion)):
  78. if set(rule.expansion[i+1:j]) <= NULLABLE:
  79. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  80. changed = True
  81. return FIRST, FOLLOW, NULLABLE
  82. class GrammarAnalyzer(object):
  83. def __init__(self, parser_conf, debug=False):
  84. self.debug = debug
  85. rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
  86. self.rules_by_origin = classify(rules, lambda r: r.origin)
  87. assert len(rules) == len(set(rules))
  88. for r in rules:
  89. for sym in r.expansion:
  90. if not (sym.is_term or sym in self.rules_by_origin):
  91. raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
  92. self.start_state = self.expand_rule(NonTerminal('$root'))
  93. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  94. def expand_rule(self, rule):
  95. "Returns all init_ptrs accessible by rule (recursive)"
  96. init_ptrs = set()
  97. def _expand_rule(rule):
  98. assert not rule.is_term, rule
  99. for r in self.rules_by_origin[rule]:
  100. init_ptr = RulePtr(r, 0)
  101. init_ptrs.add(init_ptr)
  102. if r.expansion: # if not empty rule
  103. new_r = init_ptr.next
  104. if not new_r.is_term:
  105. yield new_r
  106. for _ in bfs([rule], _expand_rule):
  107. pass
  108. return fzset(init_ptrs)
  109. def _first(self, r):
  110. if r.is_term:
  111. return {r}
  112. else:
  113. return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}