MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

218 lines
5.5 KiB

  1. #!/usr/bin/env python
  2. import hashlib
  3. import pasn1
  4. import os.path
  5. import shutil
  6. import string
  7. import tempfile
  8. import unittest
  9. import uuid
  10. _validhashes = set([ 'sha256', 'sha512' ])
  11. _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes }
  12. # XXX - add validation
  13. class ObjWrap(object):
  14. '''This is a simple wrapper that turns a JSON object into a pythonesc
  15. object where attribute accesses work.'''
  16. def __init__(self, obj):
  17. self._obj = obj
  18. def __getattr__(self, k):
  19. return self._obj[k]
  20. def __getitem__(self, k):
  21. return self._obj[k]
  22. def __to_dict__(self):
  23. return self._obj
  24. def __eq__(self, o):
  25. return cmp(self._obj, o) == 0
  26. def _trytodict(o):
  27. try:
  28. return 'dict', o.__to_dict__()
  29. except Exception:
  30. raise TypeError('unable to find __to_dict__ on %s' % type(o))
  31. _asn1coder = pasn1.ASN1DictCoder(coerce=_trytodict)
  32. class ObjectStore(object):
  33. '''A container to store for the various Metadata objects.'''
  34. def __init__(self):
  35. self._uuids = {}
  36. self._hashes = {}
  37. @staticmethod
  38. def makehash(hashstr, strict=True):
  39. '''Take a hash string, and return a valid hash string from it.
  40. This makes sure that it is of the correct type and length.
  41. If strict is False, the function will detect the length and
  42. return a valid hash if one can be found.'''
  43. try:
  44. hash, value = hashstr.split(':')
  45. except ValueError:
  46. if strict:
  47. raise
  48. hash = _hashlengths[len(hashstr)]
  49. value = hashstr
  50. if strict and len(str(value).translate(None, string.hexdigits.lower())) != 0:
  51. raise ValueError('value has invalid hex digits (must be lower case)', value)
  52. if hash in _validhashes:
  53. return ':'.join((hash, value))
  54. raise ValueError
  55. def __len__(self):
  56. return len(self._uuids)
  57. def store(self, fname):
  58. '''Write out the objects in the store to the file named
  59. fname.'''
  60. with open(fname, 'w') as fp:
  61. fp.write(_asn1coder.dumps(self._uuids.values()))
  62. def loadobj(self, obj):
  63. '''Load obj into the data store.'''
  64. if not isinstance(obj, ObjWrap):
  65. obj = ObjWrap(obj)
  66. id = uuid.UUID(obj.uuid)
  67. self._uuids[id] = obj
  68. for j in obj.hashes:
  69. h = self.makehash(j)
  70. self._hashes.setdefault(h, []).append(obj)
  71. def load(self, fname):
  72. '''Load objects from the provided file name.
  73. Basic validation will be done on the objects in the file.
  74. The objects will be accessible via other methods.'''
  75. with open(fname) as fp:
  76. objs = _asn1coder.loads(fp.read())
  77. for i in objs:
  78. self.loadobj(i)
  79. def by_id(self, id):
  80. '''Look up an object by it's UUID.'''
  81. uid = uuid.UUID(id)
  82. return self._uuids[uid]
  83. def by_hash(self, hash):
  84. '''Look up an object by it's hash value.'''
  85. h = self.makehash(hash, strict=False)
  86. return self._hashes[h]
  87. class FileObject(object):
  88. def __init__(self, _dir, filename):
  89. self._dir = _dir
  90. self._fname = filename
  91. @property
  92. def filename(self):
  93. '''The name of the file.'''
  94. return self._fname
  95. @property
  96. def dir(self):
  97. '''The directory of the file.'''
  98. return self._dir
  99. @property
  100. def id(self):
  101. '''The UUID of the path to this file.'''
  102. # XXX make sure this is correct
  103. return uuid.uuid5(uuid.NAMESPACE_URL, 'someurl' + '/'.join(os.path.split(self._dir) + ( self._fname, )))
  104. def enumeratedir(_dir):
  105. '''Enumerate all the files and directories (not recursive) in _dir.
  106. Returned is a list of FileObjects.'''
  107. return map(lambda x: FileObject(_dir, x), os.listdir(_dir))
  108. class _TestCases(unittest.TestCase):
  109. def setUp(self):
  110. d = tempfile.mkdtemp()
  111. self.basetempdir = d
  112. self.tempdir = os.path.join(d, 'subdir')
  113. shutil.copytree(os.path.join('fixtures', 'testfiles'),
  114. self.tempdir)
  115. def tearDown(self):
  116. shutil.rmtree(self.basetempdir)
  117. self.tempdir = None
  118. def test_makehash(self):
  119. self.assertRaises(ValueError, ObjectStore.makehash, 'slkj')
  120. self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA')
  121. self.assertEqual(ObjectStore.makehash('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e', strict=False), 'sha512:cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e')
  122. self.assertEqual(ObjectStore.makehash('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', strict=False), 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
  123. def test_enumeratedir(self):
  124. files = enumeratedir(self.tempdir)
  125. ftest = files[0]
  126. fname = 'test.txt'
  127. self.assertEqual(ftest.filename, fname)
  128. self.assertEqual(ftest.dir, self.tempdir)
  129. self.assertEqual(ftest.id, uuid.uuid5(uuid.NAMESPACE_URL,
  130. 'someurl' + '/'.join(os.path.split(self.tempdir) +
  131. ( fname, ))))
  132. def test_objectstore(self):
  133. objst = ObjectStore()
  134. objst.load(os.path.join('fixtures', 'sample.data.pasn1'))
  135. objst.loadobj({
  136. 'type': 'metadata',
  137. 'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7',
  138. 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
  139. 'lang': 'en',
  140. })
  141. lst = objst.by_hash('91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada')
  142. self.assertEqual(len(lst), 2)
  143. byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96')
  144. self.assertIn(byid, lst)
  145. r = byid
  146. self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96')
  147. self.assertEqual(r['dc:author'], 'John-Mark Gurney')
  148. objst.store('testfile.pasn1')
  149. with open('testfile.pasn1') as fp:
  150. objs = _asn1coder.loads(fp.read())
  151. self.assertEqual(len(objs), len(objst))
  152. for i in objs:
  153. self.assertEqual(objst.by_id(i['uuid']), i)