This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

145 Zeilen
4.9 KiB

  1. "My name is Earley"
  2. from .utils import classify
  3. class MatchFailed(object):
  4. pass
  5. class AbortParseMatch(Exception):
  6. pass
  7. class Rule(object):
  8. def __init__(self, name, symbols, postprocess):
  9. self.name = name
  10. self.symbols = symbols
  11. self.postprocess = postprocess
  12. class State(object):
  13. def __init__(self, rule, expect, reference, data=None):
  14. self.rule = rule
  15. self.expect = expect
  16. self.reference = reference
  17. self.data = data or []
  18. self.is_complete = (self.expect == len(self.rule.symbols))
  19. if not self.is_complete:
  20. self.expect_symbol = self.rule.symbols[self.expect]
  21. self.is_literal = isinstance(self.expect_symbol, dict)
  22. if self.is_literal:
  23. self.expect_symbol = self.expect_symbol['literal']
  24. assert isinstance(self.expect_symbol, (str, unicode)), self.expect_symbol
  25. def next_state(self, data):
  26. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  27. def consume_terminal(self, inp):
  28. if not self.is_complete and self.is_literal:
  29. # PORT: originally tests regexp
  30. if self.expect_symbol == inp.type:
  31. return self.next_state(inp)
  32. def consume_nonterminal(self, inp):
  33. if not self.is_complete and not self.is_literal:
  34. if self.expect_symbol == inp:
  35. return self.next_state(inp)
  36. def process(self, location, ind, table, rules, added_rules):
  37. if self.is_complete:
  38. # Completed a rule
  39. if self.rule.postprocess:
  40. try:
  41. # self.data = self.rule.postprocess(self.data, self.reference)
  42. # import pdb
  43. # pdb.set_trace()
  44. self.data = self.rule.postprocess(self.data)
  45. except AbortParseMatch:
  46. self.data = MatchFailed
  47. if self.data is not MatchFailed:
  48. for s in table[self.reference]:
  49. x = s.consume_nonterminal(self.rule.name)
  50. if x:
  51. x.data[-1] = self.data
  52. x.epsilon_closure(location, ind, table)
  53. else:
  54. exp = self.rule.symbols[self.expect]
  55. if isinstance(exp, dict):
  56. return
  57. for r in rules[exp]:
  58. assert r.name == exp
  59. if r not in added_rules:
  60. if r.symbols:
  61. added_rules.add(r)
  62. State(r, 0, location).epsilon_closure(location, ind, table)
  63. else:
  64. # Empty rule
  65. new_copy = self.consume_nonterminal(r.name)
  66. if r.postprocess:
  67. new_copy.data[-1] = r.postprocess([])
  68. # new_copy.data[-1] = r.postprocess([], self.reference)
  69. else:
  70. new_copy.data[-1] = []
  71. new_copy.epsilon_closure(location, ind, table)
  72. def epsilon_closure(self, location, ind, table, result=None):
  73. col = table[location]
  74. if not result:
  75. result = col
  76. result.append(self)
  77. if not self.is_complete:
  78. for i in xrange(ind):
  79. state = col[i]
  80. if state.is_complete and state.reference == location:
  81. x = self.consume_nonterminal(state.rule.name)
  82. if x:
  83. x.data[-1] = state.data
  84. x.epsilon_closure(location, ind, table)
  85. class Parser(object):
  86. def __init__(self, rules, start=None):
  87. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  88. self.rules_by_name = classify(self.rules, lambda r: r.name)
  89. self.start = start or self.rules[0].name
  90. def advance_to(self, table, n, added_rules):
  91. for w, s in enumerate(table[n]):
  92. s.process(n, w, table, self.rules_by_name, added_rules)
  93. def parse(self, stream):
  94. initial_rules = set(self.rules_by_name[self.start])
  95. table = [[State(r, 0, 0) for r in initial_rules]]
  96. self.advance_to(table, 0, initial_rules)
  97. for pos, token in enumerate(stream):
  98. table.append([])
  99. for s in table[pos]:
  100. x = s.consume_terminal(token)
  101. if x:
  102. table[pos + 1].append(x)
  103. self.advance_to(table, pos + 1, set())
  104. if not table[-1]:
  105. raise Exception('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
  106. return list(self.finish(table))
  107. def finish(self, table):
  108. for t in table[-1]:
  109. if (t.rule.name == self.start
  110. and t.expect == len(t.rule.symbols)
  111. and t.reference == 0
  112. and t.data != MatchFailed):
  113. yield t.data