This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
5.1 KiB

  1. "My name is Earley"
  2. from ..utils import classify, STRING_TYPE
  3. from ..common import ParseError
  4. try:
  5. xrange
  6. except NameError:
  7. xrange = range
  8. class MatchFailed(object):
  9. pass
  10. class AbortParseMatch(Exception):
  11. pass
  12. class Rule(object):
  13. def __init__(self, name, symbols, postprocess):
  14. self.name = name
  15. self.symbols = symbols
  16. self.postprocess = postprocess
  17. class State(object):
  18. def __init__(self, rule, expect, reference, data=None):
  19. self.rule = rule
  20. self.expect = expect
  21. self.reference = reference
  22. self.data = data or []
  23. self.is_complete = (self.expect == len(self.rule.symbols))
  24. if not self.is_complete:
  25. self.expect_symbol = self.rule.symbols[self.expect]
  26. self.is_literal = isinstance(self.expect_symbol, dict)
  27. if self.is_literal:
  28. self.expect_symbol = self.expect_symbol['literal']
  29. assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
  30. def next_state(self, data):
  31. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  32. def consume_terminal(self, inp):
  33. if not self.is_complete and self.is_literal:
  34. # PORT: originally tests regexp
  35. if self.expect_symbol == inp.type:
  36. return self.next_state(inp)
  37. def consume_nonterminal(self, inp):
  38. if not self.is_complete and not self.is_literal:
  39. if self.expect_symbol == inp:
  40. return self.next_state(inp)
  41. def process(self, location, ind, table, rules, added_rules):
  42. if self.is_complete:
  43. # Completed a rule
  44. if self.rule.postprocess:
  45. try:
  46. # self.data = self.rule.postprocess(self.data, self.reference)
  47. # import pdb
  48. # pdb.set_trace()
  49. self.data = self.rule.postprocess(self.data)
  50. except AbortParseMatch:
  51. self.data = MatchFailed
  52. if self.data is not MatchFailed:
  53. for s in table[self.reference]:
  54. x = s.consume_nonterminal(self.rule.name)
  55. if x:
  56. x.data[-1] = self.data
  57. x.epsilon_closure(location, ind, table)
  58. else:
  59. exp = self.rule.symbols[self.expect]
  60. if isinstance(exp, dict):
  61. return
  62. for r in rules[exp]:
  63. assert r.name == exp
  64. if r not in added_rules:
  65. if r.symbols:
  66. added_rules.add(r)
  67. State(r, 0, location).epsilon_closure(location, ind, table)
  68. else:
  69. # Empty rule
  70. new_copy = self.consume_nonterminal(r.name)
  71. if r.postprocess:
  72. new_copy.data[-1] = r.postprocess([])
  73. # new_copy.data[-1] = r.postprocess([], self.reference)
  74. else:
  75. new_copy.data[-1] = []
  76. new_copy.epsilon_closure(location, ind, table)
  77. def epsilon_closure(self, location, ind, table, result=None):
  78. col = table[location]
  79. if not result:
  80. result = col
  81. result.append(self)
  82. if not self.is_complete:
  83. for i in xrange(ind):
  84. state = col[i]
  85. if state.is_complete and state.reference == location:
  86. x = self.consume_nonterminal(state.rule.name)
  87. if x:
  88. x.data[-1] = state.data
  89. x.epsilon_closure(location, ind, table)
  90. class Parser(object):
  91. def __init__(self, rules, start=None):
  92. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  93. self.rules_by_name = classify(self.rules, lambda r: r.name)
  94. self.start = start or self.rules[0].name
  95. def advance_to(self, table, n, added_rules):
  96. for w, s in enumerate(table[n]):
  97. s.process(n, w, table, self.rules_by_name, added_rules)
  98. def parse(self, stream):
  99. initial_rules = set(self.rules_by_name[self.start])
  100. table = [[State(r, 0, 0) for r in initial_rules]]
  101. self.advance_to(table, 0, initial_rules)
  102. for pos, token in enumerate(stream):
  103. table.append([])
  104. for s in table[pos]:
  105. x = s.consume_terminal(token)
  106. if x:
  107. table[pos + 1].append(x)
  108. self.advance_to(table, pos + 1, set())
  109. if not table[-1]:
  110. raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
  111. res = list(self.finish(table))
  112. if not res:
  113. raise ParseError('Incomplete parse')
  114. return res
  115. def finish(self, table):
  116. for t in table[-1]:
  117. if (t.rule.name == self.start
  118. and t.expect == len(t.rule.symbols)
  119. and t.reference == 0
  120. and t.data is not MatchFailed):
  121. yield t.data