This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

145 рядки
4.9 KiB

  1. "My name is Earley"
  2. from .utils import classify
  3. class MatchFailed(object):
  4. pass
  5. class AbortParseMatch(Exception):
  6. pass
  7. class Rule(object):
  8. def __init__(self, name, symbols, postprocess):
  9. self.name = name
  10. self.symbols = symbols
  11. self.postprocess = postprocess
  12. class State(object):
  13. def __init__(self, rule, expect, reference, data=None):
  14. self.rule = rule
  15. self.expect = expect
  16. self.reference = reference
  17. self.data = data or []
  18. self.is_complete = (self.expect == len(self.rule.symbols))
  19. if not self.is_complete:
  20. self.expect_symbol = self.rule.symbols[self.expect]
  21. self.is_literal = isinstance(self.expect_symbol, dict)
  22. if self.is_literal:
  23. self.expect_symbol = self.expect_symbol['literal']
  24. assert isinstance(self.expect_symbol, (str, unicode)), self.expect_symbol
  25. def next_state(self, data):
  26. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  27. def consume_terminal(self, inp):
  28. if not self.is_complete and self.is_literal:
  29. # PORT: originally tests regexp
  30. if self.expect_symbol == inp.type:
  31. return self.next_state(inp)
  32. def consume_nonterminal(self, inp):
  33. if not self.is_complete and not self.is_literal:
  34. if self.expect_symbol == inp:
  35. return self.next_state(inp)
  36. def process(self, location, ind, table, rules, added_rules):
  37. if self.is_complete:
  38. # Completed a rule
  39. if self.rule.postprocess:
  40. try:
  41. # self.data = self.rule.postprocess(self.data, self.reference)
  42. # import pdb
  43. # pdb.set_trace()
  44. self.data = self.rule.postprocess(self.data)
  45. except AbortParseMatch:
  46. self.data = MatchFailed
  47. if self.data is not MatchFailed:
  48. for s in table[self.reference]:
  49. x = s.consume_nonterminal(self.rule.name)
  50. if x:
  51. x.data[-1] = self.data
  52. x.epsilon_closure(location, ind, table)
  53. else:
  54. exp = self.rule.symbols[self.expect]
  55. if isinstance(exp, dict):
  56. return
  57. for r in rules[exp]:
  58. assert r.name == exp
  59. if r not in added_rules:
  60. if r.symbols:
  61. added_rules.add(r)
  62. State(r, 0, location).epsilon_closure(location, ind, table)
  63. else:
  64. # Empty rule
  65. new_copy = self.consume_nonterminal(r.name)
  66. if r.postprocess:
  67. new_copy.data[-1] = r.postprocess([])
  68. # new_copy.data[-1] = r.postprocess([], self.reference)
  69. else:
  70. new_copy.data[-1] = []
  71. new_copy.epsilon_closure(location, ind, table)
  72. def epsilon_closure(self, location, ind, table, result=None):
  73. col = table[location]
  74. if not result:
  75. result = col
  76. result.append(self)
  77. if not self.is_complete:
  78. for i in xrange(ind):
  79. state = col[i]
  80. if state.is_complete and state.reference == location:
  81. x = self.consume_nonterminal(state.rule.name)
  82. if x:
  83. x.data[-1] = state.data
  84. x.epsilon_closure(location, ind, table)
  85. class Parser(object):
  86. def __init__(self, rules, start=None):
  87. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  88. self.rules_by_name = classify(self.rules, lambda r: r.name)
  89. self.start = start or self.rules[0].name
  90. def advance_to(self, table, n, added_rules):
  91. for w, s in enumerate(table[n]):
  92. s.process(n, w, table, self.rules_by_name, added_rules)
  93. def parse(self, stream):
  94. initial_rules = set(self.rules_by_name[self.start])
  95. table = [[State(r, 0, 0) for r in initial_rules]]
  96. self.advance_to(table, 0, initial_rules)
  97. for pos, token in enumerate(stream):
  98. table.append([])
  99. for s in table[pos]:
  100. x = s.consume_terminal(token)
  101. if x:
  102. table[pos + 1].append(x)
  103. self.advance_to(table, pos + 1, set())
  104. if not table[-1]:
  105. raise Exception('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
  106. return list(self.finish(table))
  107. def finish(self, table):
  108. for t in table[-1]:
  109. if (t.rule.name == self.start
  110. and t.expect == len(t.rule.symbols)
  111. and t.reference == 0
  112. and t.data != MatchFailed):
  113. yield t.data