You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

506 lines
19 KiB

  1. #! /usr/bin/env python
  2. """Compatibility module, imported by ZSI if you don't have PyXML 0.7.
  3. No copyright violations -- we're only using parts of PyXML that we
  4. wrote.
  5. """
  6. from ZSI import _attrs, _children, _copyright
  7. _copyright += "\n\nPortions are also: "
  8. _copyright += '''Copyright 2001, Zolera Systems Inc. All Rights Reserved.
  9. Copyright 2001, MIT. All Rights Reserved.
  10. Distributed under the terms of:
  11. Python 2.0 License or later.
  12. http://www.python.org/2.0.1/license.html
  13. or
  14. W3C Software License
  15. http://www.w3.org/Consortium/Legal/copyright-software-19980720
  16. '''
  17. from xml.dom import Node
  18. from Namespaces import XMLNS
  19. import cStringIO as StringIO
  20. try:
  21. from xml.dom.ext import c14n
  22. except ImportError, ex:
  23. _implementation2 = None
  24. else:
  25. class _implementation2(c14n._implementation):
  26. """Patch for exclusive c14n
  27. """
  28. def __init__(self, node, write, **kw):
  29. self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
  30. self._exclusive = None
  31. if node.nodeType == Node.ELEMENT_NODE:
  32. if not c14n._inclusive(self):
  33. self._exclusive = self._inherit_context(node)
  34. c14n._implementation.__init__(self, node, write, **kw)
  35. def _do_element(self, node, initial_other_attrs = []):
  36. """Patch for the xml.dom.ext.c14n implemenation _do_element method.
  37. This fixes a problem with sorting of namespaces.
  38. """
  39. # Get state (from the stack) make local copies.
  40. # ns_parent -- NS declarations in parent
  41. # ns_rendered -- NS nodes rendered by ancestors
  42. # ns_local -- NS declarations relevant to this element
  43. # xml_attrs -- Attributes in XML namespace from parent
  44. # xml_attrs_local -- Local attributes in XML namespace.
  45. ns_parent, ns_rendered, xml_attrs = \
  46. self.state[0], self.state[1].copy(), self.state[2].copy() #0422
  47. ns_local = ns_parent.copy()
  48. xml_attrs_local = {}
  49. # Divide attributes into NS, XML, and others.
  50. #other_attrs = initial_other_attrs[:]
  51. other_attrs = []
  52. sort_these_attrs = initial_other_attrs[:]
  53. in_subset = c14n._in_subset(self.subset, node)
  54. #for a in _attrs(node):
  55. sort_these_attrs +=c14n._attrs(node)
  56. for a in sort_these_attrs:
  57. if a.namespaceURI == c14n.XMLNS.BASE:
  58. n = a.nodeName
  59. if n == "xmlns:": n = "xmlns" # DOM bug workaround
  60. ns_local[n] = a.nodeValue
  61. elif a.namespaceURI == c14n.XMLNS.XML:
  62. if c14n._inclusive(self) or (in_subset and c14n._in_subset(self.subset, a)): #020925 Test to see if attribute node in subset
  63. xml_attrs_local[a.nodeName] = a #0426
  64. else:
  65. if c14n._in_subset(self.subset, a): #020925 Test to see if attribute node in subset
  66. other_attrs.append(a)
  67. #add local xml:foo attributes to ancestor's xml:foo attributes
  68. xml_attrs.update(xml_attrs_local)
  69. # Render the node
  70. W, name = self.write, None
  71. if in_subset:
  72. name = node.nodeName
  73. W('<')
  74. W(name)
  75. # Create list of NS attributes to render.
  76. ns_to_render = []
  77. for n,v in ns_local.items():
  78. # If default namespace is XMLNS.BASE or empty,
  79. # and if an ancestor was the same
  80. if n == "xmlns" and v in [ c14n.XMLNS.BASE, '' ] \
  81. and ns_rendered.get('xmlns') in [ c14n.XMLNS.BASE, '', None ]:
  82. continue
  83. # "omit namespace node with local name xml, which defines
  84. # the xml prefix, if its string value is
  85. # http://www.w3.org/XML/1998/namespace."
  86. if n in ["xmlns:xml", "xml"] \
  87. and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
  88. continue
  89. # If not previously rendered
  90. # and it's inclusive or utilized
  91. if (n,v) not in ns_rendered.items() \
  92. and (c14n._inclusive(self) or \
  93. c14n._utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
  94. ns_to_render.append((n, v))
  95. #####################################
  96. # JRB
  97. #####################################
  98. if not c14n._inclusive(self):
  99. if node.prefix is None:
  100. look_for = [('xmlns', node.namespaceURI),]
  101. else:
  102. look_for = [('xmlns:%s' %node.prefix, node.namespaceURI),]
  103. for a in c14n._attrs(node):
  104. if a.namespaceURI != XMLNS.BASE:
  105. #print "ATTRIBUTE: ", (a.namespaceURI, a.prefix)
  106. if a.prefix:
  107. #print "APREFIX: ", a.prefix
  108. look_for.append(('xmlns:%s' %a.prefix, a.namespaceURI))
  109. for key,namespaceURI in look_for:
  110. if ns_rendered.has_key(key):
  111. if ns_rendered[key] == namespaceURI:
  112. # Dont write out
  113. pass
  114. else:
  115. #ns_to_render += [(key, namespaceURI)]
  116. pass
  117. elif (key,namespaceURI) in ns_to_render:
  118. # Dont write out
  119. pass
  120. else:
  121. # Unique write out, rewrite to render
  122. ns_local[key] = namespaceURI
  123. for a in self._exclusive:
  124. if a.nodeName == key:
  125. #self._do_attr(a.nodeName, a.value)
  126. #ns_rendered[key] = namespaceURI
  127. #break
  128. ns_to_render += [(a.nodeName, a.value)]
  129. break
  130. elif key is None and a.nodeName == 'xmlns':
  131. #print "DEFAULT: ", (a.nodeName, a.value)
  132. ns_to_render += [(a.nodeName, a.value)]
  133. break
  134. #print "KEY: ", key
  135. else:
  136. #print "Look for: ", look_for
  137. #print "NS_TO_RENDER: ", ns_to_render
  138. #print "EXCLUSIVE NS: ", map(lambda f: (f.nodeName,f.value),self._exclusive)
  139. raise RuntimeError, \
  140. 'can not find namespace (%s="%s") for exclusive canonicalization'\
  141. %(key, namespaceURI)
  142. #####################################
  143. # Sort and render the ns, marking what was rendered.
  144. ns_to_render.sort(c14n._sorter_ns)
  145. for n,v in ns_to_render:
  146. #XXX JRB, getting 'xmlns,None' here when xmlns=''
  147. if v: self._do_attr(n, v)
  148. else:
  149. v = ''
  150. self._do_attr(n, v)
  151. ns_rendered[n]=v #0417
  152. # If exclusive or the parent is in the subset, add the local xml attributes
  153. # Else, add all local and ancestor xml attributes
  154. # Sort and render the attributes.
  155. if not c14n._inclusive(self) or c14n._in_subset(self.subset,node.parentNode): #0426
  156. other_attrs.extend(xml_attrs_local.values())
  157. else:
  158. other_attrs.extend(xml_attrs.values())
  159. #print "OTHER: ", other_attrs
  160. other_attrs.sort(c14n._sorter)
  161. for a in other_attrs:
  162. self._do_attr(a.nodeName, a.value)
  163. W('>')
  164. # Push state, recurse, pop state.
  165. state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
  166. for c in c14n._children(node):
  167. c14n._implementation.handlers[c.nodeType](self, c)
  168. self.state = state
  169. if name: W('</%s>' % name)
  170. c14n._implementation.handlers[c14n.Node.ELEMENT_NODE] = _do_element
  171. _IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
  172. # Does a document/PI has lesser/greater document order than the
  173. # first element?
  174. _LesserElement, _Element, _GreaterElement = range(3)
  175. def _sorter(n1,n2):
  176. '''_sorter(n1,n2) -> int
  177. Sorting predicate for non-NS attributes.'''
  178. i = cmp(n1.namespaceURI, n2.namespaceURI)
  179. if i: return i
  180. return cmp(n1.localName, n2.localName)
  181. def _sorter_ns(n1,n2):
  182. '''_sorter_ns((n,v),(n,v)) -> int
  183. "(an empty namespace URI is lexicographically least)."'''
  184. if n1[0] == 'xmlns': return -1
  185. if n2[0] == 'xmlns': return 1
  186. return cmp(n1[0], n2[0])
  187. def _utilized(n, node, other_attrs, unsuppressedPrefixes):
  188. '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
  189. Return true if that nodespace is utilized within the node'''
  190. if n.startswith('xmlns:'):
  191. n = n[6:]
  192. elif n.startswith('xmlns'):
  193. n = n[5:]
  194. if n == node.prefix or n in unsuppressedPrefixes: return 1
  195. for attr in other_attrs:
  196. if n == attr.prefix: return 1
  197. return 0
  198. _in_subset = lambda subset, node: not subset or node in subset
  199. #
  200. # JRB. Currently there is a bug in do_element, but since the underlying
  201. # Data Structures in c14n have changed I can't just apply the
  202. # _implementation2 patch above. But this will work OK for most uses,
  203. # just not XML Signatures.
  204. #
  205. class _implementation:
  206. '''Implementation class for C14N. This accompanies a node during it's
  207. processing and includes the parameters and processing state.'''
  208. # Handler for each node type; populated during module instantiation.
  209. handlers = {}
  210. def __init__(self, node, write, **kw):
  211. '''Create and run the implementation.'''
  212. self.write = write
  213. self.subset = kw.get('subset')
  214. if self.subset:
  215. self.comments = kw.get('comments', 1)
  216. else:
  217. self.comments = kw.get('comments', 0)
  218. self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
  219. nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
  220. # Processing state.
  221. self.state = (nsdict, ['xml'], [])
  222. if node.nodeType == Node.DOCUMENT_NODE:
  223. self._do_document(node)
  224. elif node.nodeType == Node.ELEMENT_NODE:
  225. self.documentOrder = _Element # At document element
  226. if self.unsuppressedPrefixes is not None:
  227. self._do_element(node)
  228. else:
  229. inherited = self._inherit_context(node)
  230. self._do_element(node, inherited)
  231. elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
  232. pass
  233. else:
  234. raise TypeError, str(node)
  235. def _inherit_context(self, node):
  236. '''_inherit_context(self, node) -> list
  237. Scan ancestors of attribute and namespace context. Used only
  238. for single element node canonicalization, not for subset
  239. canonicalization.'''
  240. # Collect the initial list of xml:foo attributes.
  241. xmlattrs = filter(_IN_XML_NS, _attrs(node))
  242. # Walk up and get all xml:XXX attributes we inherit.
  243. inherited, parent = [], node.parentNode
  244. while parent and parent.nodeType == Node.ELEMENT_NODE:
  245. for a in filter(_IN_XML_NS, _attrs(parent)):
  246. n = a.localName
  247. if n not in xmlattrs:
  248. xmlattrs.append(n)
  249. inherited.append(a)
  250. parent = parent.parentNode
  251. return inherited
  252. def _do_document(self, node):
  253. '''_do_document(self, node) -> None
  254. Process a document node. documentOrder holds whether the document
  255. element has been encountered such that PIs/comments can be written
  256. as specified.'''
  257. self.documentOrder = _LesserElement
  258. for child in node.childNodes:
  259. if child.nodeType == Node.ELEMENT_NODE:
  260. self.documentOrder = _Element # At document element
  261. self._do_element(child)
  262. self.documentOrder = _GreaterElement # After document element
  263. elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
  264. self._do_pi(child)
  265. elif child.nodeType == Node.COMMENT_NODE:
  266. self._do_comment(child)
  267. elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
  268. pass
  269. else:
  270. raise TypeError, str(child)
  271. handlers[Node.DOCUMENT_NODE] = _do_document
  272. def _do_text(self, node):
  273. '''_do_text(self, node) -> None
  274. Process a text or CDATA node. Render various special characters
  275. as their C14N entity representations.'''
  276. if not _in_subset(self.subset, node): return
  277. s = node.data \
  278. .replace("&", "&amp;") \
  279. .replace("<", "&lt;") \
  280. .replace(">", "&gt;") \
  281. .replace("\015", "&#xD;")
  282. if s: self.write(s)
  283. handlers[Node.TEXT_NODE] = _do_text
  284. handlers[Node.CDATA_SECTION_NODE] = _do_text
  285. def _do_pi(self, node):
  286. '''_do_pi(self, node) -> None
  287. Process a PI node. Render a leading or trailing #xA if the
  288. document order of the PI is greater or lesser (respectively)
  289. than the document element.
  290. '''
  291. if not _in_subset(self.subset, node): return
  292. W = self.write
  293. if self.documentOrder == _GreaterElement: W('\n')
  294. W('<?')
  295. W(node.nodeName)
  296. s = node.data
  297. if s:
  298. W(' ')
  299. W(s)
  300. W('?>')
  301. if self.documentOrder == _LesserElement: W('\n')
  302. handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
  303. def _do_comment(self, node):
  304. '''_do_comment(self, node) -> None
  305. Process a comment node. Render a leading or trailing #xA if the
  306. document order of the comment is greater or lesser (respectively)
  307. than the document element.
  308. '''
  309. if not _in_subset(self.subset, node): return
  310. if self.comments:
  311. W = self.write
  312. if self.documentOrder == _GreaterElement: W('\n')
  313. W('<!--')
  314. W(node.data)
  315. W('-->')
  316. if self.documentOrder == _LesserElement: W('\n')
  317. handlers[Node.COMMENT_NODE] = _do_comment
  318. def _do_attr(self, n, value):
  319. ''''_do_attr(self, node) -> None
  320. Process an attribute.'''
  321. W = self.write
  322. W(' ')
  323. W(n)
  324. W('="')
  325. s = value \
  326. .replace("&", "&amp;") \
  327. .replace("<", "&lt;") \
  328. .replace('"', '&quot;') \
  329. .replace('\011', '&#x9') \
  330. .replace('\012', '&#xA') \
  331. .replace('\015', '&#xD')
  332. W(s)
  333. W('"')
  334. def _do_element(self, node, initial_other_attrs = []):
  335. '''_do_element(self, node, initial_other_attrs = []) -> None
  336. Process an element (and its children).'''
  337. # Get state (from the stack) make local copies.
  338. # ns_parent -- NS declarations in parent
  339. # ns_rendered -- NS nodes rendered by ancestors
  340. # xml_attrs -- Attributes in XML namespace from parent
  341. # ns_local -- NS declarations relevant to this element
  342. ns_parent, ns_rendered, xml_attrs = \
  343. self.state[0], self.state[1][:], self.state[2][:]
  344. ns_local = ns_parent.copy()
  345. # Divide attributes into NS, XML, and others.
  346. other_attrs = initial_other_attrs[:]
  347. in_subset = _in_subset(self.subset, node)
  348. for a in _attrs(node):
  349. if a.namespaceURI == XMLNS.BASE:
  350. n = a.nodeName
  351. if n == "xmlns:": n = "xmlns" # DOM bug workaround
  352. ns_local[n] = a.nodeValue
  353. elif a.namespaceURI == XMLNS.XML:
  354. if self.unsuppressedPrefixes is None or in_subset:
  355. xml_attrs.append(a)
  356. else:
  357. other_attrs.append(a)
  358. # Render the node
  359. W, name = self.write, None
  360. if in_subset:
  361. name = node.nodeName
  362. W('<')
  363. W(name)
  364. # Create list of NS attributes to render.
  365. ns_to_render = []
  366. for n,v in ns_local.items():
  367. pval = ns_parent.get(n)
  368. # If default namespace is XMLNS.BASE or empty, skip
  369. if n == "xmlns" \
  370. and v in [ XMLNS.BASE, '' ] and pval in [ XMLNS.BASE, '' ]:
  371. continue
  372. # "omit namespace node with local name xml, which defines
  373. # the xml prefix, if its string value is
  374. # http://www.w3.org/XML/1998/namespace."
  375. if n == "xmlns:xml" \
  376. and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
  377. continue
  378. # If different from parent, or parent didn't render
  379. # and if not exclusive, or this prefix is needed or
  380. # not suppressed
  381. if (v != pval or n not in ns_rendered) \
  382. and (self.unsuppressedPrefixes is None or \
  383. _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
  384. ns_to_render.append((n, v))
  385. # Sort and render the ns, marking what was rendered.
  386. ns_to_render.sort(_sorter_ns)
  387. for n,v in ns_to_render:
  388. self._do_attr(n, v)
  389. ns_rendered.append(n)
  390. # Add in the XML attributes (don't pass to children, since
  391. # we're rendering them), sort, and render.
  392. other_attrs.extend(xml_attrs)
  393. xml_attrs = []
  394. other_attrs.sort(_sorter)
  395. for a in other_attrs:
  396. self._do_attr(a.nodeName, a.value)
  397. W('>')
  398. # Push state, recurse, pop state.
  399. state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
  400. for c in _children(node):
  401. _implementation.handlers[c.nodeType](self, c)
  402. self.state = state
  403. if name: W('</%s>' % name)
  404. handlers[Node.ELEMENT_NODE] = _do_element
  405. def Canonicalize(node, output=None, **kw):
  406. '''Canonicalize(node, output=None, **kw) -> UTF-8
  407. Canonicalize a DOM document/element node and all descendents.
  408. Return the text; if output is specified then output.write will
  409. be called to output the text and None will be returned
  410. Keyword parameters:
  411. nsdict: a dictionary of prefix:uri namespace entries
  412. assumed to exist in the surrounding context
  413. comments: keep comments if non-zero (default is 0)
  414. subset: Canonical XML subsetting resulting from XPath
  415. (default is [])
  416. unsuppressedPrefixes: do exclusive C14N, and this specifies the
  417. prefixes that should be inherited.
  418. '''
  419. if output:
  420. if _implementation2 is None:
  421. _implementation(node, output.write, **kw)
  422. else:
  423. apply(_implementation2, (node, output.write), kw)
  424. else:
  425. s = c14n.StringIO.StringIO()
  426. if _implementation2 is None:
  427. _implementation(node, s.write, **kw)
  428. else:
  429. apply(_implementation2, (node, s.write), kw)
  430. return s.getvalue()
  431. if __name__ == '__main__': print _copyright