This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

149 lines
5.1 KiB

  1. "My name is Earley"
  2. from .utils import classify
  3. class MatchFailed(object):
  4. pass
  5. class AbortParseMatch(Exception):
  6. pass
  7. class Rule(object):
  8. def __init__(self, name, symbols, postprocess):
  9. self.name = name
  10. self.symbols = symbols
  11. self.postprocess = postprocess
  12. class State(object):
  13. def __init__(self, rule, expect, reference, data=None):
  14. self.rule = rule
  15. self.expect = expect
  16. self.reference = reference
  17. self.data = data or []
  18. self.is_complete = (self.expect == len(self.rule.symbols))
  19. if not self.is_complete:
  20. self.expect_symbol = self.rule.symbols[self.expect]
  21. self.is_literal = isinstance(self.expect_symbol, dict)
  22. if self.is_literal:
  23. self.expect_symbol = self.expect_symbol['literal']
  24. assert isinstance(self.expect_symbol, (str, unicode)), self.expect_symbol
  25. def next_state(self, data):
  26. return State(self.rule, self.expect+1, self.reference, self.data + [data])
  27. def consume_terminal(self, inp):
  28. if not self.is_complete and self.is_literal:
  29. # PORT: originally tests regexp
  30. if self.expect_symbol == inp.type:
  31. return self.next_state(inp)
  32. def consume_nonterminal(self, inp):
  33. if not self.is_complete and not self.is_literal:
  34. if self.expect_symbol == inp:
  35. return self.next_state(inp)
  36. def process(self, location, ind, table, rules, added_rules):
  37. if self.is_complete:
  38. # Completed a rule
  39. if self.rule.postprocess:
  40. try:
  41. # self.data = self.rule.postprocess(self.data, self.reference)
  42. # import pdb
  43. # pdb.set_trace()
  44. self.data = self.rule.postprocess(self.data)
  45. except AbortParseMatch:
  46. self.data = MatchFailed
  47. if self.data is not MatchFailed:
  48. for s in table[self.reference]:
  49. x = s.consume_nonterminal(self.rule.name)
  50. if x:
  51. x.data[-1] = self.data
  52. x.epsilon_closure(location, ind, table)
  53. else:
  54. exp = self.rule.symbols[self.expect]
  55. if isinstance(exp, dict):
  56. return
  57. for r in rules[exp]:
  58. assert r.name == exp
  59. if r not in added_rules:
  60. if r.symbols:
  61. added_rules.add(r)
  62. State(r, 0, location).epsilon_closure(location, ind, table)
  63. else:
  64. # Empty rule
  65. new_copy = self.consume_nonterminal(r.name)
  66. if r.postprocess:
  67. new_copy.data[-1] = r.postprocess([])
  68. # new_copy.data[-1] = r.postprocess([], self.reference)
  69. else:
  70. new_copy.data[-1] = []
  71. new_copy.epsilon_closure(location, ind, table)
  72. def epsilon_closure(self, location, ind, table, result=None):
  73. col = table[location]
  74. if not result:
  75. result = col
  76. result.append(self)
  77. if not self.is_complete:
  78. for i in xrange(ind):
  79. state = col[i]
  80. if state.is_complete and state.reference == location:
  81. x = self.consume_nonterminal(state.rule.name)
  82. if x:
  83. x.data[-1] = state.data
  84. x.epsilon_closure(location, ind, table)
  85. class Parser(object):
  86. def __init__(self, rules, start=None):
  87. self.table = [[]]
  88. self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
  89. self.rules_by_name = classify(self.rules, lambda r: r.name)
  90. self.start = start or self.rules[0].name
  91. initial_rules = set(self.rules_by_name[self.start])
  92. self.table[0] += [State(r, 0, 0) for r in initial_rules]
  93. self.advance_to(0, initial_rules)
  94. self.current = 0
  95. def advance_to(self, n, added_rules):
  96. for w, s in enumerate(self.table[n]):
  97. s.process(n, w, self.table, self.rules_by_name, added_rules)
  98. def parse(self, chunk):
  99. chunk_pos = 0
  100. for chunk_pos, chunk_item in enumerate(chunk):
  101. self.table.append([])
  102. for s in self.table[self.current + chunk_pos]:
  103. x = s.consume_terminal(chunk_item)
  104. if x:
  105. self.table[self.current + chunk_pos + 1].append(x)
  106. added_rules = set()
  107. self.advance_to(self.current + chunk_pos + 1, added_rules)
  108. if not self.table[-1]:
  109. raise Exception('Error at line {t.line}:{t.column}'.format(t=chunk[chunk_pos]))
  110. self.current += chunk_pos
  111. return list(self.finish())
  112. def finish(self):
  113. for t in self.table[-1]:
  114. if (t.rule.name == self.start
  115. and t.expect == len(t.rule.symbols)
  116. and t.reference == 0
  117. and t.data != MatchFailed):
  118. yield t.data