This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

465 lines
18 KiB

  1. """"This module implements an SPPF implementation
  2. This is used as the primary output mechanism for the Earley parser
  3. in order to store complex ambiguities.
  4. Full reference and more details is here:
  5. http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
  6. """
  7. from random import randint
  8. from ..tree import Tree
  9. from ..exceptions import ParseError
  10. from ..lexer import Token
  11. from ..utils import Str
  12. from ..grammar import NonTerminal, Terminal, Symbol
  13. from collections import deque
  14. from importlib import import_module
  15. class ForestNode(object):
  16. pass
  17. class SymbolNode(ForestNode):
  18. """
  19. A Symbol Node represents a symbol (or Intermediate LR0).
  20. Symbol nodes are keyed by the symbol (s). For intermediate nodes
  21. s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
  22. nodes, s will be a string representing the non-terminal origin (i.e.
  23. the left hand side of the rule).
  24. The children of a Symbol or Intermediate Node will always be Packed Nodes;
  25. with each Packed Node child representing a single derivation of a production.
  26. Hence a Symbol Node with a single child is unambiguous.
  27. """
  28. __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
  29. def __init__(self, s, start, end):
  30. self.s = s
  31. self.start = start
  32. self.end = end
  33. self._children = set()
  34. self.paths = set()
  35. self.paths_loaded = False
  36. self.priority = None
  37. self.is_intermediate = isinstance(s, tuple)
  38. self._hash = hash((self.s, self.start, self.end))
  39. def add_family(self, lr0, rule, start, left, right):
  40. self._children.add(PackedNode(self, lr0, rule, start, left, right))
  41. def add_path(self, transitive, node):
  42. self.paths.add((transitive, node))
  43. def load_paths(self):
  44. for transitive, node in self.paths:
  45. if transitive.next_titem is not None:
  46. vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
  47. vn.add_path(transitive.next_titem, node)
  48. self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
  49. else:
  50. self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
  51. self.paths_loaded = True
  52. @property
  53. def is_ambiguous(self):
  54. return len(self.children) > 1
  55. @property
  56. def children(self):
  57. if not self.paths_loaded:
  58. self.load_paths()
  59. return self._children
  60. def __iter__(self):
  61. return iter(self._children)
  62. def __eq__(self, other):
  63. if not isinstance(other, SymbolNode):
  64. return False
  65. return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)
  66. def __hash__(self):
  67. return self._hash
  68. def __repr__(self):
  69. if self.is_intermediate:
  70. rule = self.s[0]
  71. ptr = self.s[1]
  72. before = ( expansion.name for expansion in rule.expansion[:ptr] )
  73. after = ( expansion.name for expansion in rule.expansion[ptr:] )
  74. symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
  75. else:
  76. symbol = self.s.name
  77. return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0)
  78. class PackedNode(ForestNode):
  79. """
  80. A Packed Node represents a single derivation in a symbol node.
  81. """
  82. __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
  83. def __init__(self, parent, s, rule, start, left, right):
  84. self.parent = parent
  85. self.s = s
  86. self.start = start
  87. self.rule = rule
  88. self.left = left
  89. self.right = right
  90. self.priority = None
  91. self._hash = hash((self.s, self.start, self.left, self.right))
  92. @property
  93. def is_empty(self):
  94. return self.left is None and self.right is None
  95. def __iter__(self):
  96. return iter([self.left, self.right])
  97. def __lt__(self, other):
  98. if self.is_empty and not other.is_empty: return True
  99. if self.priority < other.priority: return True
  100. return False
  101. def __gt__(self, other):
  102. if self.is_empty and not other.is_empty: return True
  103. if self.priority > other.priority: return True
  104. return False
  105. def __eq__(self, other):
  106. if not isinstance(other, PackedNode):
  107. return False
  108. return self is other or (self.s == other.s and self.start == other.start and self.left == other.left and self.right == other.right)
  109. def __hash__(self):
  110. return self._hash
  111. def __repr__(self):
  112. if isinstance(self.s, tuple):
  113. rule = self.s[0]
  114. ptr = self.s[1]
  115. before = ( expansion.name for expansion in rule.expansion[:ptr] )
  116. after = ( expansion.name for expansion in rule.expansion[ptr:] )
  117. symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
  118. else:
  119. symbol = self.s.name
  120. return "({}, {}, {})".format(symbol, self.start, self.priority)
  121. class ForestVisitor(object):
  122. """
  123. An abstract base class for building forest visitors.
  124. Use this as a base when you need to walk the forest.
  125. """
  126. __slots__ = ['result']
  127. def visit_token_node(self, node): pass
  128. def visit_symbol_node_in(self, node): pass
  129. def visit_symbol_node_out(self, node): pass
  130. def visit_packed_node_in(self, node): pass
  131. def visit_packed_node_out(self, node): pass
  132. def go(self, root):
  133. self.result = None
  134. # Visiting is a list of IDs of all symbol/intermediate nodes currently in
  135. # the stack. It serves two purposes: to detect when we 'recurse' in and out
  136. # of a symbol/intermediate so that we can process both up and down. Also,
  137. # since the SPPF can have cycles it allows us to detect if we're trying
  138. # to recurse into a node that's already on the stack (infinite recursion).
  139. visiting = set()
  140. # We do not use recursion here to walk the Forest due to the limited
  141. # stack size in python. Therefore input_stack is essentially our stack.
  142. input_stack = deque([root])
  143. # It is much faster to cache these as locals since they are called
  144. # many times in large parses.
  145. vpno = getattr(self, 'visit_packed_node_out')
  146. vpni = getattr(self, 'visit_packed_node_in')
  147. vsno = getattr(self, 'visit_symbol_node_out')
  148. vsni = getattr(self, 'visit_symbol_node_in')
  149. vtn = getattr(self, 'visit_token_node')
  150. while input_stack:
  151. current = next(reversed(input_stack))
  152. try:
  153. next_node = next(current)
  154. except StopIteration:
  155. input_stack.pop()
  156. continue
  157. except TypeError:
  158. ### If the current object is not an iterator, pass through to Token/SymbolNode
  159. pass
  160. else:
  161. if next_node is None:
  162. continue
  163. if id(next_node) in visiting:
  164. raise ParseError("Infinite recursion in grammar!")
  165. input_stack.append(next_node)
  166. continue
  167. if not isinstance(current, ForestNode):
  168. vtn(current)
  169. input_stack.pop()
  170. continue
  171. current_id = id(current)
  172. if current_id in visiting:
  173. if isinstance(current, PackedNode): vpno(current)
  174. else: vsno(current)
  175. input_stack.pop()
  176. visiting.remove(current_id)
  177. continue
  178. else:
  179. visiting.add(current_id)
  180. if isinstance(current, PackedNode): next_node = vpni(current)
  181. else: next_node = vsni(current)
  182. if next_node is None:
  183. continue
  184. if id(next_node) in visiting:
  185. raise ParseError("Infinite recursion in grammar!")
  186. input_stack.append(next_node)
  187. continue
  188. return self.result
  189. class ForestSumVisitor(ForestVisitor):
  190. """
  191. A visitor for prioritizing ambiguous parts of the Forest.
  192. This visitor is the default when resolving ambiguity. It pushes the priorities
  193. from the rules into the SPPF nodes; and then sorts the packed node children
  194. of ambiguous symbol or intermediate node according to the priorities.
  195. This relies on the custom sort function provided in PackedNode.__lt__; which
  196. uses these properties (and other factors) to sort the ambiguous packed nodes.
  197. """
  198. def visit_packed_node_in(self, node):
  199. return iter([node.left, node.right])
  200. def visit_symbol_node_in(self, node):
  201. return iter(node.children)
  202. def visit_packed_node_out(self, node):
  203. node.priority = 0
  204. if node.rule.options and node.rule.options.priority: node.priority += node.rule.options.priority
  205. if node.right is not None and hasattr(node.right, 'priority'): node.priority += node.right.priority
  206. if node.left is not None and hasattr(node.left, 'priority'): node.priority += node.left.priority
  207. def visit_symbol_node_out(self, node):
  208. node.priority = max(child.priority for child in node.children)
  209. node._children = sorted(node.children, reverse = True)
  210. class ForestAntiscoreSumVisitor(ForestSumVisitor):
  211. """
  212. A visitor for prioritizing ambiguous parts of the Forest.
  213. This visitor is used when resolve_ambiguity == 'resolve__antiscore_sum'.
  214. It pushes the priorities from the rules into the SPPF nodes, and implements
  215. a 'least cost' mechanism for resolving ambiguity (reverse of the default
  216. priority mechanism). It uses a custom __lt__ comparator key for sorting
  217. the packed node children.
  218. """
  219. def visit_symbol_node_out(self, node):
  220. node.priority = min(child.priority for child in node.children)
  221. node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)
  222. class AntiscoreSumComparator(object):
  223. """
  224. An antiscore-sum comparator for PackedNode objects.
  225. This allows 'sorting' an iterable of PackedNode objects so that they
  226. are arranged lowest priority first.
  227. """
  228. __slots__ = ['obj']
  229. def __init__(self, obj, *args):
  230. self.obj = obj
  231. def __lt__(self, other):
  232. if self.obj.is_empty and not other.obj.is_empty: return True
  233. if self.obj.priority > other.obj.priority: return True
  234. return False
  235. def __gt__(self, other):
  236. if self.obj.is_empty and not other.obj.is_empty: return True
  237. if self.obj.priority < other.obj.priority: return True
  238. return False
  239. class ForestToTreeVisitor(ForestVisitor):
  240. """
  241. A Forest visitor which converts an SPPF forest to an unambiguous AST.
  242. The implementation in this visitor walks only the first ambiguous child
  243. of each symbol node. When it finds an ambiguous symbol node it first
  244. calls the forest_sum_visitor implementation to sort the children
  245. into preference order using the algorithms defined there; so the first
  246. child should always be the highest preference. The forest_sum_visitor
  247. implementation should be another ForestVisitor which sorts the children
  248. according to some priority mechanism.
  249. """
  250. __slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks']
  251. def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None):
  252. self.forest_sum_visitor = forest_sum_visitor()
  253. self.callbacks = callbacks
  254. def go(self, root):
  255. self.output_stack = deque()
  256. return super(ForestToTreeVisitor, self).go(root)
  257. def visit_token_node(self, node):
  258. self.output_stack[-1].append(node)
  259. def visit_symbol_node_in(self, node):
  260. if node.is_ambiguous and node.priority is None:
  261. self.forest_sum_visitor.go(node)
  262. return next(iter(node.children))
  263. def visit_packed_node_in(self, node):
  264. if not node.parent.is_intermediate:
  265. self.output_stack.append([])
  266. return iter([node.left, node.right])
  267. def visit_packed_node_out(self, node):
  268. if not node.parent.is_intermediate:
  269. result = self.callbacks[node.rule](self.output_stack.pop())
  270. if self.output_stack:
  271. self.output_stack[-1].append(result)
  272. else:
  273. self.result = result
  274. class ForestToAmbiguousTreeVisitor(ForestVisitor):
  275. """
  276. A Forest visitor which converts an SPPF forest to an ambiguous AST.
  277. Because of the fundamental disparity between what can be stored in
  278. an SPPF and what can be stored in a Tree; this implementation is not
  279. complete. It correctly deals with ambiguities that occur on symbol nodes only,
  280. and cannot deal with ambiguities that occur on intermediate nodes.
  281. Usually, most parsers can be rewritten to avoid intermediate node
  282. ambiguities. Also, this implementation could be fixed, however
  283. the code to handle intermediate node ambiguities is messy and
  284. would not be performant. It is much better not to use this and
  285. instead to correctly disambiguate the forest and only store unambiguous
  286. parses in Trees. It is here just to provide some parity with the
  287. old ambiguity='explicit'.
  288. This is mainly used by the test framework, to make it simpler to write
  289. tests ensuring the SPPF contains the right results.
  290. """
  291. __slots__ = ['output_stack', 'callbacks']
  292. def __init__(self, callbacks):
  293. self.callbacks = callbacks
  294. def go(self, root):
  295. self.output_stack = deque([])
  296. return super(ForestToAmbiguousTreeVisitor, self).go(root)
  297. def visit_token_node(self, node):
  298. self.output_stack[-1].children.append(node)
  299. def visit_symbol_node_in(self, node):
  300. if not node.is_intermediate and node.is_ambiguous:
  301. self.output_stack.append(Tree('_ambig', []))
  302. return iter(node.children)
  303. def visit_symbol_node_out(self, node):
  304. if not node.is_intermediate and node.is_ambiguous:
  305. result = self.output_stack.pop()
  306. if self.output_stack:
  307. self.output_stack[-1].children.append(result)
  308. else:
  309. self.result = result
  310. def visit_packed_node_in(self, node):
  311. #### NOTE:
  312. ## When an intermediate node (node.parent.s == tuple) has ambiguous children this
  313. ## forest visitor will break.
  314. if not node.parent.is_intermediate:
  315. self.output_stack.append(Tree('drv', []))
  316. return iter([node.left, node.right])
  317. def visit_packed_node_out(self, node):
  318. if not node.parent.is_intermediate:
  319. result = self.callbacks[node.rule](self.output_stack.pop().children)
  320. if self.output_stack:
  321. self.output_stack[-1].children.append(result)
  322. else:
  323. self.result = result
  324. class ForestToPyDotVisitor(ForestVisitor):
  325. """
  326. A Forest visitor which writes the SPPF to a PNG.
  327. The SPPF can get really large, really quickly because
  328. of the amount of meta-data it stores, so this is probably
  329. only useful for trivial trees and learning how the SPPF
  330. is structured.
  331. """
  332. def __init__(self, rankdir="TB"):
  333. self.pydot = import_module('pydot')
  334. self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
  335. def go(self, root, filename):
  336. super(ForestToPyDotVisitor, self).go(root)
  337. self.graph.write_png(filename)
  338. def visit_token_node(self, node):
  339. graph_node_id = str(id(node))
  340. graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
  341. graph_node_color = 0x808080
  342. graph_node_style = "\"filled,rounded\""
  343. graph_node_shape = "diamond"
  344. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  345. self.graph.add_node(graph_node)
  346. def visit_packed_node_in(self, node):
  347. graph_node_id = str(id(node))
  348. graph_node_label = repr(node)
  349. graph_node_color = 0x808080
  350. graph_node_style = "filled"
  351. graph_node_shape = "diamond"
  352. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  353. self.graph.add_node(graph_node)
  354. return iter([node.left, node.right])
  355. def visit_packed_node_out(self, node):
  356. graph_node_id = str(id(node))
  357. graph_node = self.graph.get_node(graph_node_id)[0]
  358. for child in [node.left, node.right]:
  359. if child is not None:
  360. child_graph_node_id = str(id(child))
  361. child_graph_node = self.graph.get_node(child_graph_node_id)[0]
  362. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
  363. else:
  364. #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
  365. child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
  366. child_graph_node_style = "invis"
  367. child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
  368. child_edge_style = "invis"
  369. self.graph.add_node(child_graph_node)
  370. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
  371. def visit_symbol_node_in(self, node):
  372. graph_node_id = str(id(node))
  373. graph_node_label = repr(node)
  374. graph_node_color = 0x808080
  375. graph_node_style = "\"filled\""
  376. if node.is_intermediate:
  377. graph_node_shape = "ellipse"
  378. else:
  379. graph_node_shape = "rectangle"
  380. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  381. self.graph.add_node(graph_node)
  382. return iter(node.children)
  383. def visit_symbol_node_out(self, node):
  384. graph_node_id = str(id(node))
  385. graph_node = self.graph.get_node(graph_node_id)[0]
  386. for child in node.children:
  387. child_graph_node_id = str(id(child))
  388. child_graph_node = self.graph.get_node(child_graph_node_id)[0]
  389. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))