This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.8 KiB

  1. "My name is Earley"
  2. from ..utils import classify, STRING_TYPE
  3. from ..common import ParseError, UnexpectedToken
  4. try:
  5. xrange
  6. except NameError:
  7. xrange = range
  8. class MatchFailed(object):
  9. pass
  10. class AbortParseMatch(Exception):
  11. pass
  12. class Rule(object):
  13. def __init__(self, name, symbols, postprocess):
  14. self.name = name
  15. self.symbols = symbols
  16. self.postprocess = postprocess
  17. class State(object):
  18. def __init__(self, rule, expect, reference, data=None):
  19. self.rule = rule
  20. self.expect = expect
  21. self.reference = reference
  22. self.data = data or []
  23. self.is_complete = (self.expect == len(self.rule.symbols))
  24. if not self.is_complete:
  25. self.expect_symbol = self.rule.symbols[self.expect]
  26. self.is_terminal = isinstance(self.expect_symbol, tuple)
  27. else:
  28. self.is_terminal = False
  29. def next_state(self, data):
  30. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  31. def consume_terminal(self, inp):
  32. if not self.is_complete and self.is_terminal:
  33. # PORT: originally tests regexp
  34. if self.expect_symbol[1] is not None:
  35. match = self.expect_symbol[1].match(stream, pos)
  36. if self.expect_symbol[0] == inp.type:
  37. return self.next_state(inp)
  38. def consume_nonterminal(self, inp):
  39. if not self.is_complete and not self.is_terminal:
  40. if self.expect_symbol == inp:
  41. return self.next_state(inp)
  42. def process(self, location, ind, table, rules, added_rules):
  43. if self.is_complete:
  44. # Completed a rule
  45. if self.rule.postprocess:
  46. try:
  47. self.data = self.rule.postprocess(self.data)
  48. except AbortParseMatch:
  49. self.data = MatchFailed
  50. if self.data is not MatchFailed:
  51. for s in table[self.reference]:
  52. x = s.consume_nonterminal(self.rule.name)
  53. if x:
  54. x.data[-1] = self.data
  55. x.epsilon_closure(location, ind, table)
  56. else:
  57. exp = self.rule.symbols[self.expect]
  58. if isinstance(exp, tuple):
  59. return
  60. for r in rules[exp]:
  61. assert r.name == exp
  62. if r not in added_rules:
  63. if r.symbols:
  64. added_rules.add(r)
  65. State(r, 0, location).epsilon_closure(location, ind, table)
  66. else:
  67. # Empty rule
  68. new_copy = self.consume_nonterminal(r.name)
  69. new_copy.data[-1] = r.postprocess([]) if r.postprocess else []
  70. new_copy.epsilon_closure(location, ind, table)
  71. def epsilon_closure(self, location, ind, table):
  72. col = table[location]
  73. col.append(self)
  74. if not self.is_complete:
  75. for i in xrange(ind):
  76. state = col[i]
  77. if state.is_complete and state.reference == location:
  78. x = self.consume_nonterminal(state.rule.name)
  79. if x:
  80. x.data[-1] = state.data
  81. x.epsilon_closure(location, ind, table)
  82. class Parser(object):
  83. def __init__(self, rules, start=None):
  84. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  85. self.rules_by_name = classify(self.rules, lambda r: r.name)
  86. self.start = start or self.rules[0].name
  87. def advance_to(self, table, added_rules):
  88. n = len(table)-1
  89. for w, s in enumerate(table[n]):
  90. s.process(n, w, table, self.rules_by_name, added_rules)
  91. def parse(self, stream):
  92. initial_rules = set(self.rules_by_name[self.start])
  93. table = [[State(r, 0, 0) for r in initial_rules]]
  94. self.advance_to(table, initial_rules)
  95. i = 0
  96. while i < len(stream):
  97. col = []
  98. token = stream[i]
  99. for s in table[-1]:
  100. x = s.consume_terminal(token)
  101. if x:
  102. col.append(x)
  103. if not col:
  104. expected = {s.expect_symbol for s in table[-1] if s.is_terminal}
  105. raise UnexpectedToken(stream[i], expected, stream, i)
  106. table.append(col)
  107. self.advance_to(table, set())
  108. i += 1
  109. res = list(self.finish(table))
  110. if not res:
  111. raise ParseError('Incomplete parse')
  112. return res
  113. def finish(self, table):
  114. for t in table[-1]:
  115. if (t.rule.name == self.start
  116. and t.expect == len(t.rule.symbols)
  117. and t.reference == 0
  118. and t.data is not MatchFailed):
  119. yield t.data