This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
5.1 KiB

  1. from collections import Counter
  2. from ..utils import bfs, fzset, classify
  3. from ..exceptions import GrammarError
  4. from ..grammar import Rule, Terminal, NonTerminal
  5. class RulePtr(object):
  6. __slots__ = ('rule', 'index')
  7. def __init__(self, rule, index):
  8. assert isinstance(rule, Rule)
  9. assert index <= len(rule.expansion)
  10. self.rule = rule
  11. self.index = index
  12. def __repr__(self):
  13. before = [x.name for x in self.rule.expansion[:self.index]]
  14. after = [x.name for x in self.rule.expansion[self.index:]]
  15. return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
  16. @property
  17. def next(self):
  18. return self.rule.expansion[self.index]
  19. def advance(self, sym):
  20. assert self.next == sym
  21. return RulePtr(self.rule, self.index+1)
  22. @property
  23. def is_satisfied(self):
  24. return self.index == len(self.rule.expansion)
  25. def __eq__(self, other):
  26. return self.rule == other.rule and self.index == other.index
  27. def __hash__(self):
  28. return hash((self.rule, self.index))
  29. def update_set(set1, set2):
  30. if not set2 or set1 > set2:
  31. return False
  32. copy = set(set1)
  33. set1 |= set2
  34. return set1 != copy
  35. def calculate_sets(rules):
  36. """Calculate FOLLOW sets.
  37. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  38. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  39. # foreach grammar rule X ::= Y(1) ... Y(k)
  40. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  41. # NULLABLE = NULLABLE union {X}
  42. # for i = 1 to k
  43. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  44. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  45. # for j = i+1 to k
  46. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  47. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  48. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  49. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  50. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  51. NULLABLE = set()
  52. FIRST = {}
  53. FOLLOW = {}
  54. for sym in symbols:
  55. FIRST[sym]={sym} if sym.is_term else set()
  56. FOLLOW[sym]=set()
  57. # Calculate NULLABLE and FIRST
  58. changed = True
  59. while changed:
  60. changed = False
  61. for rule in rules:
  62. if set(rule.expansion) <= NULLABLE:
  63. if update_set(NULLABLE, {rule.origin}):
  64. changed = True
  65. for i, sym in enumerate(rule.expansion):
  66. if set(rule.expansion[:i]) <= NULLABLE:
  67. if update_set(FIRST[rule.origin], FIRST[sym]):
  68. changed = True
  69. else:
  70. break
  71. # Calculate FOLLOW
  72. changed = True
  73. while changed:
  74. changed = False
  75. for rule in rules:
  76. for i, sym in enumerate(rule.expansion):
  77. if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
  78. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  79. changed = True
  80. for j in range(i+1, len(rule.expansion)):
  81. if set(rule.expansion[i+1:j]) <= NULLABLE:
  82. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  83. changed = True
  84. return FIRST, FOLLOW, NULLABLE
  85. class GrammarAnalyzer(object):
  86. def __init__(self, parser_conf, debug=False):
  87. self.debug = debug
  88. root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
  89. for start in parser_conf.start}
  90. rules = parser_conf.rules + list(root_rules.values())
  91. self.rules_by_origin = classify(rules, lambda r: r.origin)
  92. if len(rules) != len(set(rules)):
  93. duplicates = [item for item, count in Counter(rules).items() if count > 1]
  94. raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
  95. for r in rules:
  96. for sym in r.expansion:
  97. if not (sym.is_term or sym in self.rules_by_origin):
  98. raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
  99. self.start_states = {start: self.expand_rule(root_rule.origin)
  100. for start, root_rule in root_rules.items()}
  101. self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
  102. for start, root_rule in root_rules.items()}
  103. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  104. def expand_rule(self, source_rule):
  105. "Returns all init_ptrs accessible by rule (recursive)"
  106. init_ptrs = set()
  107. def _expand_rule(rule):
  108. assert not rule.is_term, rule
  109. for r in self.rules_by_origin[rule]:
  110. init_ptr = RulePtr(r, 0)
  111. init_ptrs.add(init_ptr)
  112. if r.expansion: # if not empty rule
  113. new_r = init_ptr.next
  114. if not new_r.is_term:
  115. yield new_r
  116. for _ in bfs([source_rule], _expand_rule):
  117. pass
  118. return fzset(init_ptrs)