This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

271 líneas
9.0 KiB

  1. "This module implements an Earley Parser"
  2. # The parser uses a parse-forest to keep track of derivations and ambiguations.
  3. # When the parse ends successfully, a disambiguation stage resolves all ambiguity
  4. # (right now ambiguity resolution is not developed beyond the needs of lark)
  5. # Afterwards the parse tree is reduced (transformed) according to user callbacks.
  6. # I use the no-recursion version of Transformer and Visitor, because the tree might be
  7. # deeper than Python's recursion limit (a bit absurd, but that's life)
  8. #
  9. # The algorithm keeps track of each state set, using a corresponding Column instance.
  10. # Column keeps track of new items using NewsList instances.
  11. #
  12. # Author: Erez Shinan (2017)
  13. # Email : erezshin@gmail.com
  14. from ..common import ParseError, UnexpectedToken, Terminal
  15. from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse
  16. from .grammar_analysis import GrammarAnalyzer
  17. class EndToken:
  18. type = '$end'
  19. class Derivation(Tree):
  20. _hash = None
  21. def __init__(self, rule, items=None):
  22. Tree.__init__(self, 'drv', items or [])
  23. self.rule = rule
  24. def _pretty_label(self): # Nicer pretty for debugging the parser
  25. return self.rule.origin if self.rule else self.data
  26. def __hash__(self):
  27. if self._hash is None:
  28. self._hash = Tree.__hash__(self)
  29. return self._hash
  30. END_TOKEN = EndToken()
  31. class Item(object):
  32. "An Earley Item, the atom of the algorithm."
  33. def __init__(self, rule, ptr, start, tree):
  34. self.rule = rule
  35. self.ptr = ptr
  36. self.start = start
  37. self.tree = tree if tree is not None else Derivation(self.rule)
  38. @property
  39. def expect(self):
  40. return self.rule.expansion[self.ptr]
  41. @property
  42. def is_complete(self):
  43. return self.ptr == len(self.rule.expansion)
  44. def advance(self, tree):
  45. assert self.tree.data == 'drv'
  46. new_tree = Derivation(self.rule, self.tree.children + [tree])
  47. return self.__class__(self.rule, self.ptr+1, self.start, new_tree)
  48. def similar(self, other):
  49. return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
  50. def __eq__(self, other):
  51. return self.similar(other) and (self.tree == other.tree)
  52. def __hash__(self):
  53. return hash((self.rule, self.ptr, id(self.start), self.tree)) # Always runs Derivation.__hash__
  54. def __repr__(self):
  55. before = list(map(str, self.rule.expansion[:self.ptr]))
  56. after = list(map(str, self.rule.expansion[self.ptr:]))
  57. return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after))
  58. class Item_JoinDerivations(Item):
  59. __eq__ = Item.similar
  60. def __hash__(self):
  61. return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__
  62. class NewsList(list):
  63. "Keeps track of newly added items (append-only)"
  64. def __init__(self, initial=None):
  65. list.__init__(self, initial or [])
  66. self.last_iter = 0
  67. def get_news(self):
  68. i = self.last_iter
  69. self.last_iter = len(self)
  70. return self[i:]
  71. class Column:
  72. "An entry in the table, aka Earley Chart. Contains lists of items."
  73. def __init__(self, i):
  74. self.i = i
  75. self.to_reduce = NewsList()
  76. self.to_predict = NewsList()
  77. self.to_scan = NewsList()
  78. self.item_count = 0
  79. self.added = set()
  80. self.completed = {}
  81. def add(self, items):
  82. """Sort items into scan/predict/reduce newslists
  83. Makes sure only unique items are added.
  84. """
  85. for item in items:
  86. if item.is_complete:
  87. # XXX Potential bug: What happens if there's ambiguity in an empty rule?
  88. if item.rule.expansion and item in self.completed:
  89. old_tree = self.completed[item].tree
  90. if old_tree.data != '_ambig':
  91. new_tree = old_tree.copy()
  92. new_tree.rule = old_tree.rule
  93. old_tree.set('_ambig', [new_tree])
  94. old_tree.rule = None # No longer a 'drv' node
  95. if item.tree.children[0] is old_tree: # XXX a little hacky!
  96. raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
  97. old_tree.children.append(item.tree)
  98. else:
  99. self.completed[item] = item
  100. self.to_reduce.append(item)
  101. else:
  102. if item not in self.added:
  103. self.added.add(item)
  104. if isinstance(item.expect, Terminal):
  105. self.to_scan.append(item)
  106. else:
  107. self.to_predict.append(item)
  108. self.item_count += 1 # Only count if actually added
  109. def __nonzero__(self):
  110. return bool(self.item_count)
  111. class Parser:
  112. def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True):
  113. """
  114. all_derivations:
  115. True = Try every rule combination, and every possible derivation of each rule. (default)
  116. False = Try every rule combination, but not every derivation of the same rule.
  117. """
  118. self.analysis = GrammarAnalyzer(rules, start_symbol)
  119. self.start_symbol = start_symbol
  120. self.resolve_ambiguity = resolve_ambiguity
  121. self.all_derivations = all_derivations
  122. self.postprocess = {}
  123. self.predictions = {}
  124. for rule in self.analysis.rules:
  125. if rule.origin != '$root': # XXX kinda ugly
  126. a = rule.alias
  127. self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
  128. self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
  129. def parse(self, stream, start_symbol=None):
  130. # Define parser functions
  131. start_symbol = start_symbol or self.start_symbol
  132. _Item = Item if self.all_derivations else Item_JoinDerivations
  133. def predict(nonterm, column):
  134. assert not isinstance(nonterm, Terminal), nonterm
  135. return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
  136. def complete(item):
  137. name = item.rule.origin
  138. return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name]
  139. def predict_and_complete(column):
  140. while True:
  141. to_predict = {x.expect for x in column.to_predict.get_news()
  142. if x.ptr} # if not part of an already predicted batch
  143. to_reduce = column.to_reduce.get_news()
  144. if not (to_predict or to_reduce):
  145. break
  146. for nonterm in to_predict:
  147. column.add( predict(nonterm, column) )
  148. for item in to_reduce:
  149. new_items = list(complete(item))
  150. for new_item in new_items:
  151. if new_item.similar(item):
  152. raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
  153. column.add(new_items)
  154. def scan(i, token, column):
  155. to_scan = column.to_scan.get_news()
  156. next_set = Column(i)
  157. next_set.add(item.advance(token) for item in to_scan if item.expect.match(token))
  158. if not next_set:
  159. expect = {i.expect for i in column.to_scan}
  160. raise UnexpectedToken(token, expect, stream, i)
  161. return next_set
  162. # Main loop starts
  163. column0 = Column(0)
  164. column0.add(predict(start_symbol, column0))
  165. column = column0
  166. for i, token in enumerate(stream):
  167. predict_and_complete(column)
  168. column = scan(i, token, column)
  169. predict_and_complete(column)
  170. # Parse ended. Now build a parse tree
  171. solutions = [n.tree for n in column.to_reduce
  172. if n.rule.origin==start_symbol and n.start is column0]
  173. if not solutions:
  174. raise ParseError('Incomplete parse: Could not find a solution to input')
  175. elif len(solutions) == 1:
  176. tree = solutions[0]
  177. else:
  178. tree = Tree('_ambig', solutions)
  179. if self.resolve_ambiguity:
  180. tree = self.resolve_ambiguity(tree)
  181. return ApplyCallbacks(self.postprocess).transform(tree)
  182. class ApplyCallbacks(Transformer_NoRecurse):
  183. def __init__(self, postprocess):
  184. self.postprocess = postprocess
  185. def drv(self, tree):
  186. children = tree.children
  187. callback = self.postprocess[tree.rule]
  188. if callback:
  189. return callback(children)
  190. else:
  191. return Tree(rule.origin, children)
  192. # RULES = [
  193. # ('a', ['d']),
  194. # ('d', ['b']),
  195. # ('b', ['C']),
  196. # ('b', ['b', 'C']),
  197. # ('b', ['C', 'b']),
  198. # ]
  199. # p = Parser(RULES, 'a')
  200. # for x in p.parse('CC'):
  201. # print x.pretty()
  202. #---------------
  203. # RULES = [
  204. # ('s', ['a', 'a']),
  205. # ('a', ['b', 'b']),
  206. # ('b', ['C'], lambda (x,): x),
  207. # ('b', ['b', 'C']),
  208. # ]
  209. # p = Parser(RULES, 's', {})
  210. # print p.parse('CCCCC').pretty()