This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
5.2 KiB

  1. "My name is Earley"
  2. from ..utils import classify, STRING_TYPE
  3. from ..common import ParseError, UnexpectedToken
  4. try:
  5. xrange
  6. except NameError:
  7. xrange = range
  8. class MatchFailed(object):
  9. pass
  10. class AbortParseMatch(Exception):
  11. pass
  12. class Rule(object):
  13. def __init__(self, name, symbols, postprocess):
  14. self.name = name
  15. self.symbols = symbols
  16. self.postprocess = postprocess
  17. class State(object):
  18. def __init__(self, rule, expect, reference, data=None):
  19. self.rule = rule
  20. self.expect = expect
  21. self.reference = reference
  22. self.data = data or []
  23. self.is_complete = (self.expect == len(self.rule.symbols))
  24. if not self.is_complete:
  25. self.expect_symbol = self.rule.symbols[self.expect]
  26. self.is_literal = isinstance(self.expect_symbol, dict)
  27. if self.is_literal:
  28. self.expect_symbol = self.expect_symbol['literal']
  29. assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
  30. else:
  31. self.is_literal = False
  32. def next_state(self, data):
  33. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  34. def consume_terminal(self, inp):
  35. if not self.is_complete and self.is_literal:
  36. # PORT: originally tests regexp
  37. if self.expect_symbol == inp.type:
  38. return self.next_state(inp)
  39. def consume_nonterminal(self, inp):
  40. if not self.is_complete and not self.is_literal:
  41. if self.expect_symbol == inp:
  42. return self.next_state(inp)
  43. def process(self, location, ind, table, rules, added_rules):
  44. if self.is_complete:
  45. # Completed a rule
  46. if self.rule.postprocess:
  47. try:
  48. # self.data = self.rule.postprocess(self.data, self.reference)
  49. # import pdb
  50. # pdb.set_trace()
  51. self.data = self.rule.postprocess(self.data)
  52. except AbortParseMatch:
  53. self.data = MatchFailed
  54. if self.data is not MatchFailed:
  55. for s in table[self.reference]:
  56. x = s.consume_nonterminal(self.rule.name)
  57. if x:
  58. x.data[-1] = self.data
  59. x.epsilon_closure(location, ind, table)
  60. else:
  61. exp = self.rule.symbols[self.expect]
  62. if isinstance(exp, dict):
  63. return
  64. for r in rules[exp]:
  65. assert r.name == exp
  66. if r not in added_rules:
  67. if r.symbols:
  68. added_rules.add(r)
  69. State(r, 0, location).epsilon_closure(location, ind, table)
  70. else:
  71. # Empty rule
  72. new_copy = self.consume_nonterminal(r.name)
  73. if r.postprocess:
  74. new_copy.data[-1] = r.postprocess([])
  75. # new_copy.data[-1] = r.postprocess([], self.reference)
  76. else:
  77. new_copy.data[-1] = []
  78. new_copy.epsilon_closure(location, ind, table)
  79. def epsilon_closure(self, location, ind, table, result=None):
  80. col = table[location]
  81. if not result:
  82. result = col
  83. result.append(self)
  84. if not self.is_complete:
  85. for i in xrange(ind):
  86. state = col[i]
  87. if state.is_complete and state.reference == location:
  88. x = self.consume_nonterminal(state.rule.name)
  89. if x:
  90. x.data[-1] = state.data
  91. x.epsilon_closure(location, ind, table)
  92. class Parser(object):
  93. def __init__(self, rules, start=None):
  94. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  95. self.rules_by_name = classify(self.rules, lambda r: r.name)
  96. self.start = start or self.rules[0].name
  97. def advance_to(self, table, n, added_rules):
  98. for w, s in enumerate(table[n]):
  99. s.process(n, w, table, self.rules_by_name, added_rules)
  100. def parse(self, stream):
  101. initial_rules = set(self.rules_by_name[self.start])
  102. table = [[State(r, 0, 0) for r in initial_rules]]
  103. self.advance_to(table, 0, initial_rules)
  104. for pos, token in enumerate(stream):
  105. table.append([])
  106. for s in table[pos]:
  107. x = s.consume_terminal(token)
  108. if x:
  109. table[pos + 1].append(x)
  110. self.advance_to(table, pos + 1, set())
  111. if not table[-1]:
  112. expected = {s.expect_symbol for s in table[-2] if s.is_literal}
  113. raise UnexpectedToken(stream[pos], expected, stream, pos)
  114. res = list(self.finish(table))
  115. if not res:
  116. raise ParseError('Incomplete parse')
  117. return res
  118. def finish(self, table):
  119. for t in table[-1]:
  120. if (t.rule.name == self.start
  121. and t.expect == len(t.rule.symbols)
  122. and t.reference == 0
  123. and t.data is not MatchFailed):
  124. yield t.data