This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

186 wiersze
6.3 KiB

  1. from collections import Counter, defaultdict
  2. from ..utils import bfs, fzset, classify
  3. from ..exceptions import GrammarError
  4. from ..grammar import Rule, Terminal, NonTerminal
  5. class RulePtr(object):
  6. __slots__ = ('rule', 'index')
  7. def __init__(self, rule, index):
  8. assert isinstance(rule, Rule)
  9. assert index <= len(rule.expansion)
  10. self.rule = rule
  11. self.index = index
  12. def __repr__(self):
  13. before = [x.name for x in self.rule.expansion[:self.index]]
  14. after = [x.name for x in self.rule.expansion[self.index:]]
  15. return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
  16. @property
  17. def next(self):
  18. return self.rule.expansion[self.index]
  19. def advance(self, sym):
  20. assert self.next == sym
  21. return RulePtr(self.rule, self.index+1)
  22. @property
  23. def is_satisfied(self):
  24. return self.index == len(self.rule.expansion)
  25. def __eq__(self, other):
  26. return self.rule == other.rule and self.index == other.index
  27. def __hash__(self):
  28. return hash((self.rule, self.index))
  29. # state generation ensures no duplicate LR0ItemSets
  30. class LR0ItemSet(object):
  31. __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
  32. def __init__(self, kernel, closure):
  33. self.kernel = fzset(kernel)
  34. self.closure = fzset(closure)
  35. self.transitions = {}
  36. self.lookaheads = defaultdict(set)
  37. def __repr__(self):
  38. return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
  39. def update_set(set1, set2):
  40. if not set2 or set1 > set2:
  41. return False
  42. copy = set(set1)
  43. set1 |= set2
  44. return set1 != copy
  45. def calculate_sets(rules):
  46. """Calculate FOLLOW sets.
  47. Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
  48. symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
  49. # foreach grammar rule X ::= Y(1) ... Y(k)
  50. # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
  51. # NULLABLE = NULLABLE union {X}
  52. # for i = 1 to k
  53. # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
  54. # FIRST(X) = FIRST(X) union FIRST(Y(i))
  55. # for j = i+1 to k
  56. # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
  57. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
  58. # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
  59. # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
  60. # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
  61. NULLABLE = set()
  62. FIRST = {}
  63. FOLLOW = {}
  64. for sym in symbols:
  65. FIRST[sym]={sym} if sym.is_term else set()
  66. FOLLOW[sym]=set()
  67. # Calculate NULLABLE and FIRST
  68. changed = True
  69. while changed:
  70. changed = False
  71. for rule in rules:
  72. if set(rule.expansion) <= NULLABLE:
  73. if update_set(NULLABLE, {rule.origin}):
  74. changed = True
  75. for i, sym in enumerate(rule.expansion):
  76. if set(rule.expansion[:i]) <= NULLABLE:
  77. if update_set(FIRST[rule.origin], FIRST[sym]):
  78. changed = True
  79. else:
  80. break
  81. # Calculate FOLLOW
  82. changed = True
  83. while changed:
  84. changed = False
  85. for rule in rules:
  86. for i, sym in enumerate(rule.expansion):
  87. if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
  88. if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
  89. changed = True
  90. for j in range(i+1, len(rule.expansion)):
  91. if set(rule.expansion[i+1:j]) <= NULLABLE:
  92. if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
  93. changed = True
  94. return FIRST, FOLLOW, NULLABLE
  95. class GrammarAnalyzer(object):
  96. def __init__(self, parser_conf, debug=False):
  97. self.debug = debug
  98. root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
  99. for start in parser_conf.start}
  100. rules = parser_conf.rules + list(root_rules.values())
  101. self.rules_by_origin = classify(rules, lambda r: r.origin)
  102. if len(rules) != len(set(rules)):
  103. duplicates = [item for item, count in Counter(rules).items() if count > 1]
  104. raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
  105. for r in rules:
  106. for sym in r.expansion:
  107. if not (sym.is_term or sym in self.rules_by_origin):
  108. raise GrammarError("Using an undefined rule: %s" % sym)
  109. self.start_states = {start: self.expand_rule(root_rule.origin)
  110. for start, root_rule in root_rules.items()}
  111. self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
  112. for start, root_rule in root_rules.items()}
  113. lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
  114. for start in parser_conf.start}
  115. lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
  116. assert(len(lr0_rules) == len(set(lr0_rules)))
  117. self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
  118. # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
  119. self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
  120. for start, root_rule in lr0_root_rules.items()}
  121. self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
  122. def expand_rule(self, source_rule, rules_by_origin=None):
  123. "Returns all init_ptrs accessible by rule (recursive)"
  124. if rules_by_origin is None:
  125. rules_by_origin = self.rules_by_origin
  126. init_ptrs = set()
  127. def _expand_rule(rule):
  128. assert not rule.is_term, rule
  129. for r in rules_by_origin[rule]:
  130. init_ptr = RulePtr(r, 0)
  131. init_ptrs.add(init_ptr)
  132. if r.expansion: # if not empty rule
  133. new_r = init_ptr.next
  134. if not new_r.is_term:
  135. yield new_r
  136. for _ in bfs([source_rule], _expand_rule):
  137. pass
  138. return fzset(init_ptrs)