This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

156 lines
4.8 KiB

  1. "My name is Earley"
  2. from ..utils import classify
  3. from ..common import ParseError, UnexpectedToken
  4. try:
  5. xrange
  6. except NameError:
  7. xrange = range
  8. class MatchFailed(object):
  9. pass
  10. class AbortParseMatch(Exception):
  11. pass
  12. class Rule(object):
  13. def __init__(self, name, symbols, postprocess):
  14. self.name = name
  15. self.symbols = symbols
  16. self.postprocess = postprocess
  17. class State(object):
  18. def __init__(self, rule, expect, reference, data=None):
  19. self.rule = rule
  20. self.expect = expect
  21. self.reference = reference
  22. self.data = data or []
  23. self.is_complete = (self.expect == len(self.rule.symbols))
  24. if not self.is_complete:
  25. self.expect_symbol = self.rule.symbols[self.expect]
  26. self.is_terminal = isinstance(self.expect_symbol, tuple)
  27. else:
  28. self.is_terminal = False
  29. def next_state(self, data):
  30. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  31. def consume_terminal(self, inp):
  32. if not self.is_complete and self.is_terminal:
  33. # PORT: originally tests regexp
  34. if self.expect_symbol[1] is not None:
  35. match = self.expect_symbol[1].match(inp)
  36. if match:
  37. return self.next_state(inp)
  38. elif self.expect_symbol[0] == inp.type:
  39. return self.next_state(inp)
  40. def consume_nonterminal(self, inp):
  41. if not self.is_complete and not self.is_terminal:
  42. if self.expect_symbol == inp:
  43. return self.next_state(inp)
  44. def process(self, location, ind, table, rules, added_rules):
  45. if self.is_complete:
  46. # Completed a rule
  47. if self.rule.postprocess:
  48. try:
  49. self.data = self.rule.postprocess(self.data)
  50. except AbortParseMatch:
  51. self.data = MatchFailed
  52. if self.data is not MatchFailed:
  53. for s in table[self.reference]:
  54. x = s.consume_nonterminal(self.rule.name)
  55. if x:
  56. x.data[-1] = self.data
  57. x.epsilon_closure(location, ind, table)
  58. else:
  59. exp = self.rule.symbols[self.expect]
  60. if isinstance(exp, tuple):
  61. return
  62. for r in rules[exp]:
  63. assert r.name == exp
  64. if r not in added_rules:
  65. if r.symbols:
  66. added_rules.add(r)
  67. State(r, 0, location).epsilon_closure(location, ind, table)
  68. else:
  69. # Empty rule
  70. new_copy = self.consume_nonterminal(r.name)
  71. new_copy.data[-1] = r.postprocess([]) if r.postprocess else []
  72. new_copy.epsilon_closure(location, ind, table)
  73. def epsilon_closure(self, location, ind, table):
  74. col = table[location]
  75. col.append(self)
  76. if not self.is_complete:
  77. for i in xrange(ind):
  78. state = col[i]
  79. if state.is_complete and state.reference == location:
  80. x = self.consume_nonterminal(state.rule.name)
  81. if x:
  82. x.data[-1] = state.data
  83. x.epsilon_closure(location, ind, table)
  84. class Parser(object):
  85. def __init__(self, rules, start=None):
  86. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  87. self.rules_by_name = classify(self.rules, lambda r: r.name)
  88. self.start = start or self.rules[0].name
  89. def advance_to(self, table, added_rules):
  90. n = len(table)-1
  91. for w, s in enumerate(table[n]):
  92. s.process(n, w, table, self.rules_by_name, added_rules)
  93. def parse(self, stream):
  94. initial_rules = set(self.rules_by_name[self.start])
  95. table = [[State(r, 0, 0) for r in initial_rules]]
  96. self.advance_to(table, initial_rules)
  97. i = 0
  98. while i < len(stream):
  99. col = []
  100. token = stream[i]
  101. for s in table[-1]:
  102. x = s.consume_terminal(token)
  103. if x:
  104. col.append(x)
  105. if not col:
  106. expected = {s.expect_symbol for s in table[-1] if s.is_terminal}
  107. raise UnexpectedToken(stream[i], expected, stream, i)
  108. table.append(col)
  109. self.advance_to(table, set())
  110. i += 1
  111. res = list(self.finish(table))
  112. if not res:
  113. raise ParseError('Incomplete parse')
  114. return res
  115. def finish(self, table):
  116. for t in table[-1]:
  117. if (t.rule.name == self.start
  118. and t.expect == len(t.rule.symbols)
  119. and t.reference == 0
  120. and t.data is not MatchFailed):
  121. yield t.data