This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

238 rindas
8.4 KiB

  1. "This module implements an Earley Parser"
  2. # The parser uses a parse-forest to keep track of derivations and ambiguations.
  3. # When the parse ends successfully, a disambiguation stage resolves all ambiguity
  4. # (right now ambiguity resolution is not developed beyond the needs of lark)
  5. # Afterwards the parse tree is reduced (transformed) according to user callbacks.
  6. # I use the no-recursion version of Transformer, because the tree might be
  7. # deeper than Python's recursion limit (a bit absurd, but that's life)
  8. #
  9. # The algorithm keeps track of each state set, using a corresponding Column instance.
  10. # Column keeps track of new items using NewsList instances.
  11. #
  12. # Author: Erez Shinan (2017)
  13. # Email : erezshin@gmail.com
  14. from ..common import ParseError, UnexpectedToken, is_terminal
  15. from ..tree import Tree, Transformer_NoRecurse
  16. from .grammar_analysis import GrammarAnalyzer
  17. class Derivation(Tree):
  18. _hash = None
  19. def __init__(self, rule, items=None):
  20. Tree.__init__(self, 'drv', items or [])
  21. self.rule = rule
  22. def _pretty_label(self): # Nicer pretty for debugging the parser
  23. return self.rule.origin if self.rule else self.data
  24. def __hash__(self):
  25. if self._hash is None:
  26. self._hash = Tree.__hash__(self)
  27. return self._hash
  28. class Item(object):
  29. "An Earley Item, the atom of the algorithm."
  30. def __init__(self, rule, ptr, start, tree):
  31. self.rule = rule
  32. self.ptr = ptr
  33. self.start = start
  34. self.tree = tree if tree is not None else Derivation(self.rule)
  35. @property
  36. def expect(self):
  37. return self.rule.expansion[self.ptr]
  38. @property
  39. def is_complete(self):
  40. return self.ptr == len(self.rule.expansion)
  41. def advance(self, tree):
  42. assert self.tree.data == 'drv'
  43. new_tree = Derivation(self.rule, self.tree.children + [tree])
  44. return self.__class__(self.rule, self.ptr+1, self.start, new_tree)
  45. def __eq__(self, other):
  46. return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
  47. def __hash__(self):
  48. return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__
  49. def __repr__(self):
  50. before = list(map(str, self.rule.expansion[:self.ptr]))
  51. after = list(map(str, self.rule.expansion[self.ptr:]))
  52. return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after))
  53. class NewsList(list):
  54. "Keeps track of newly added items (append-only)"
  55. def __init__(self, initial=None):
  56. list.__init__(self, initial or [])
  57. self.last_iter = 0
  58. def get_news(self):
  59. i = self.last_iter
  60. self.last_iter = len(self)
  61. return self[i:]
  62. class Column:
  63. "An entry in the table, aka Earley Chart. Contains lists of items."
  64. def __init__(self, i, FIRST, predict_all=False):
  65. self.i = i
  66. self.to_reduce = NewsList()
  67. self.to_predict = NewsList()
  68. self.to_scan = []
  69. self.item_count = 0
  70. self.FIRST = FIRST
  71. self.predicted = set()
  72. self.completed = {}
  73. self.predict_all = predict_all
  74. def add(self, items):
  75. """Sort items into scan/predict/reduce newslists
  76. Makes sure only unique items are added.
  77. """
  78. for item in items:
  79. item_key = item, item.tree # Elsewhere, tree is not part of the comparison
  80. if item.is_complete:
  81. # XXX Potential bug: What happens if there's ambiguity in an empty rule?
  82. if item.rule.expansion and item_key in self.completed:
  83. old_tree = self.completed[item_key].tree
  84. if old_tree == item.tree:
  85. is_empty = not self.FIRST[item.rule.origin]
  86. if not is_empty:
  87. continue
  88. if old_tree.data != '_ambig':
  89. new_tree = old_tree.copy()
  90. new_tree.rule = old_tree.rule
  91. old_tree.set('_ambig', [new_tree])
  92. old_tree.rule = None # No longer a 'drv' node
  93. if item.tree.children[0] is old_tree: # XXX a little hacky!
  94. raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)
  95. if item.tree not in old_tree.children:
  96. old_tree.children.append(item.tree)
  97. # old_tree.children.append(item.tree)
  98. else:
  99. self.completed[item_key] = item
  100. self.to_reduce.append(item)
  101. else:
  102. if is_terminal(item.expect):
  103. self.to_scan.append(item)
  104. else:
  105. k = item_key if self.predict_all else item
  106. if k in self.predicted:
  107. continue
  108. self.predicted.add(k)
  109. self.to_predict.append(item)
  110. self.item_count += 1 # Only count if actually added
  111. def __bool__(self):
  112. return bool(self.item_count)
  113. __nonzero__ = __bool__ # Py2 backwards-compatibility
  114. class Parser:
  115. def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
  116. analysis = GrammarAnalyzer(parser_conf)
  117. self.parser_conf = parser_conf
  118. self.resolve_ambiguity = resolve_ambiguity
  119. self.FIRST = analysis.FIRST
  120. self.postprocess = {}
  121. self.predictions = {}
  122. for rule in parser_conf.rules:
  123. self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
  124. self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
  125. self.term_matcher = term_matcher
  126. def parse(self, stream, start_symbol=None):
  127. # Define parser functions
  128. start_symbol = start_symbol or self.parser_conf.start
  129. _Item = Item
  130. match = self.term_matcher
  131. def predict(nonterm, column):
  132. assert not is_terminal(nonterm), nonterm
  133. return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
  134. def complete(item):
  135. name = item.rule.origin
  136. return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name]
  137. def predict_and_complete(column):
  138. while True:
  139. to_predict = {x.expect for x in column.to_predict.get_news()
  140. if x.ptr} # if not part of an already predicted batch
  141. to_reduce = set(column.to_reduce.get_news())
  142. if not (to_predict or to_reduce):
  143. break
  144. for nonterm in to_predict:
  145. column.add( predict(nonterm, column) )
  146. for item in to_reduce:
  147. new_items = list(complete(item))
  148. if item in new_items:
  149. raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
  150. column.add(new_items)
  151. def scan(i, token, column):
  152. next_set = Column(i, self.FIRST)
  153. next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token))
  154. if not next_set:
  155. expect = {i.expect for i in column.to_scan}
  156. raise UnexpectedToken(token, expect, stream, set(column.to_scan))
  157. return next_set
  158. # Main loop starts
  159. column0 = Column(0, self.FIRST)
  160. column0.add(predict(start_symbol, column0))
  161. column = column0
  162. for i, token in enumerate(stream):
  163. predict_and_complete(column)
  164. column = scan(i, token, column)
  165. predict_and_complete(column)
  166. # Parse ended. Now build a parse tree
  167. solutions = [n.tree for n in column.to_reduce
  168. if n.rule.origin==start_symbol and n.start is column0]
  169. if not solutions:
  170. raise ParseError('Incomplete parse: Could not find a solution to input')
  171. elif len(solutions) == 1:
  172. tree = solutions[0]
  173. else:
  174. tree = Tree('_ambig', solutions)
  175. if self.resolve_ambiguity:
  176. tree = self.resolve_ambiguity(tree)
  177. return ApplyCallbacks(self.postprocess).transform(tree)
  178. class ApplyCallbacks(Transformer_NoRecurse):
  179. def __init__(self, postprocess):
  180. self.postprocess = postprocess
  181. def drv(self, tree):
  182. return self.postprocess[tree.rule](tree.children)