|
- #!/usr/bin/env python
-
- #import pdb, sys; mypdb = pdb.Pdb(stdout=sys.stderr); mypdb.set_trace()
-
- import copy
- import datetime
- import hashlib
- import mock
- import os.path
- import pasn1
- import shutil
- import string
- import tempfile
- import unittest
- import uuid
-
- from contextlib import nested
-
- # The UUID for the namespace representing the path to a file
- _NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6')
-
- _defaulthash = 'sha512'
- _validhashes = set([ 'sha256', 'sha512' ])
- _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes }
-
- def _iterdictlist(obj):
- itms = obj.items()
- itms.sort()
- for k, v in itms:
- if isinstance(v, list):
- v = v[:]
- v.sort()
- for i in v:
- yield k, i
- else:
- yield k, v
-
- # XXX - add validation
- class MDBase(object):
- '''This is a simple wrapper that turns a JSON object into a pythonesc
- object where attribute accesses work.'''
-
- _generated_properties = {
- 'uuid': uuid.uuid4,
- 'modified': datetime.datetime.utcnow
- }
- _common_properties = [ 'type', 'created_by_ref' ] # XXX - add lang?
- _common_optional = [ 'overlay_refs' ]
- _common_names = set(_common_properties + _generated_properties.keys())
-
- def __init__(self, obj):
- obj = copy.deepcopy(obj)
- if 'type' not in obj:
- obj['type'] = self._type
-
- for x in self._common_properties:
- if x not in obj:
- raise ValueError('common property %s not present' % `x`)
-
- for x, fun in self._generated_properties.iteritems():
- if x not in obj:
- obj[x] = fun()
-
- self._obj = obj
-
- @classmethod
- def create_obj(cls, obj):
- '''Using obj as a base, create an instance of MDBase of the
- correct type.
-
- If the correct type is not found, a ValueError is raised.'''
-
- if isinstance(obj, cls):
- obj = obj._obj
-
- ty = obj['type']
-
- for i in MDBase.__subclasses__():
- if i._type == ty:
- return i(obj)
- else:
- raise ValueError('Unable to find class for type %s' %
- `ty`)
-
- def new_version(self, *args):
- '''For each k, v pari, add the property k as an additional one
- (or new on if first), with the value v.'''
-
- obj = copy.deepcopy(self._obj)
-
- for k, v in args:
- obj.setdefault(k, []).append(v)
-
- del obj['modified']
-
- return self.create_obj(obj)
-
- def __getattr__(self, k):
- return self._obj[k]
-
- def __getitem__(self, k):
- return self._obj[k]
-
- def __to_dict__(self):
- return self._obj
-
- def __eq__(self, o):
- return cmp(self._obj, o) == 0
-
- def __contains__(self, k):
- return k in self._obj
-
- def items(self, skipcommon=True):
- return [ (k, v) for k, v in self._obj.items() if k not in
- self._common_names ]
-
- class MetaData(MDBase):
- _type = 'metadata'
-
- def _trytodict(o):
- if isinstance(o, uuid.UUID):
- return 'unicode', str(o)
- try:
- return 'dict', o.__to_dict__()
- except Exception: # pragma: no cover
- raise TypeError('unable to find __to_dict__ on %s: %s' % (type(o), `o`))
-
- _asn1coder = pasn1.ASN1DictCoder(coerce=_trytodict)
-
- class ObjectStore(object):
- '''A container to store for the various Metadata objects.'''
-
- # The _uuids property contains both the UUIDv4 for objects, and
- # looking up the UUIDv5 for FileObjects.
-
- def __init__(self, created_by_ref):
- self._created_by_ref = created_by_ref
- self._uuids = {}
- self._hashes = {}
-
- @staticmethod
- def makehash(hashstr, strict=True):
- '''Take a hash string, and return a valid hash string from it.
-
- This makes sure that it is of the correct type and length.
-
- If strict is False, the function will detect the length and
- return a valid hash if one can be found.'''
-
- try:
- hash, value = hashstr.split(':')
- except ValueError:
- if strict:
- raise
-
- hash = _hashlengths[len(hashstr)]
- value = hashstr
-
- if strict and len(str(value).translate(None, string.hexdigits.lower())) != 0:
- raise ValueError('value has invalid hex digits (must be lower case)', value)
-
- if hash in _validhashes:
- return ':'.join((hash, value))
-
- raise ValueError
-
- def __len__(self):
- return len(self._uuids)
-
- def store(self, fname):
- '''Write out the objects in the store to the file named
- fname.'''
-
- with open(fname, 'w') as fp:
- obj = {
- 'created_by_ref': self._created_by_ref,
- 'objects': self._uuids.values(),
- }
- fp.write(_asn1coder.dumps(obj))
-
- def loadobj(self, obj):
- '''Load obj into the data store.'''
-
- obj = MDBase.create_obj(obj)
-
- if not isinstance(obj.uuid, uuid.UUID):
- id = uuid.UUID(obj.uuid)
- else:
- id = obj.uuid
-
- self._uuids[id] = obj
- for j in obj.hashes:
- h = self.makehash(j)
- self._hashes.setdefault(h, []).append(obj)
-
- @classmethod
- def load(cls, fname):
- '''Load objects from the provided file name.
-
- Basic validation will be done on the objects in the file.
-
- The objects will be accessible via other methods.'''
-
- with open(fname) as fp:
- objs = _asn1coder.loads(fp.read())
-
- obj = cls(objs['created_by_ref'])
- for i in objs['objects']:
- obj.loadobj(i)
-
- return obj
-
- def by_id(self, id):
- '''Look up an object by it's UUID.'''
-
- if not isinstance(id, uuid.UUID):
- uid = uuid.UUID(id)
- else:
- uid = id
-
- return self._uuids[uid]
-
- def by_hash(self, hash):
- '''Look up an object by it's hash value.'''
-
- h = self.makehash(hash, strict=False)
- return self._hashes[h]
-
- def by_file(self, fname):
- '''Return a metadata object for the file named fname.'''
-
- fid = FileObject.make_id(fname)
- try:
- fobj = self.by_id(fid)
- except KeyError:
- # unable to find it
- fobj = FileObject.from_file(fname, self._created_by_ref)
- self.loadobj(fobj)
-
- for i in fobj.hashes:
- j = self.by_hash(i)
-
- # Filter out non-metadata objects
- j = [ x for x in j if x.type == 'metadata' ]
- if j:
- return j
- else:
- raise KeyError('unable to find metadata for file')
-
- def _hashfile(fname):
- hash = getattr(hashlib, _defaulthash)()
- with open(fname) as fp:
- r = fp.read()
- hash.update(r)
-
- return '%s:%s' % (_defaulthash, hash.hexdigest())
-
- class FileObject(MDBase):
- _type = 'file'
-
- @staticmethod
- def make_id(fname):
- '''Take a local file name, and make the id for it. Note that
- converts from the local path separator to a forward slash so
- that it will be the same between Windows and Unix systems.'''
-
- fname = os.path.realpath(fname)
- return uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
- '/'.join(os.path.split(fname)))
-
- @classmethod
- def from_file(cls, filename, created_by_ref):
- s = os.stat(filename)
- obj = {
- 'dir': os.path.dirname(filename),
- 'created_by_ref': created_by_ref,
- 'filename': os.path.basename(filename),
- 'id': cls.make_id(filename),
- 'mtime': datetime.datetime.utcfromtimestamp(s.st_mtime),
- 'size': s.st_size,
- 'hashes': [ _hashfile(filename), ],
- }
-
-
- return cls(obj)
-
- def enumeratedir(_dir, created_by_ref):
- '''Enumerate all the files and directories (not recursive) in _dir.
-
- Returned is a list of FileObjects.'''
-
- return map(lambda x: FileObject.from_file(os.path.join(_dir, x), created_by_ref),
- os.listdir(_dir))
-
- def main():
- from optparse import OptionParser
-
- parser = OptionParser()
- parser.add_option('-a', action='append', dest='add',
- default=[], help='add the arg as metadata for files, tag=value')
- parser.add_option('-d', action='append', dest='delete',
- default=[], help='delete the arg as metadata from files. Either specify tag, and all tags are removed, or specify tag=value and that specific tag will be removed.')
- parser.add_option('-l', action='store_true', dest='list',
- default=False, help='list metadata')
-
- options, args = parser.parse_args()
-
- storefname = os.path.expanduser('~/.medashare_store.pasn1')
- import sys
- #print >>sys.stderr, `storefname`
- objstr = ObjectStore.load(storefname)
-
- if options.list:
- for i in args:
- for j in objstr.by_file(i):
- #print >>sys.stderr, `j._obj`
- for k, v in _iterdictlist(j):
- print '%s:\t%s' % (k, v)
- elif options.add:
- addprops = map(lambda x: x.split('=', 1), options.add)
- for i in args:
- for j in objstr.by_file(i):
- nobj = j.new_version(*addprops)
- objstr.loadobj(nobj)
- elif options.delete:
- for i in args:
- for j in objstr.by_file(i):
- obj = j.__to_dict__()
- for k in options.delete:
- try:
- key, v = k.split('=', 1)
- obj[key].remove(v)
- except ValueError:
- del obj[k]
- nobj = MDBase(obj)
- objstr.loadobj(nobj)
- else: # pragma: no cover
- raise NotImplementedError
-
- objstr.store(storefname)
-
- if __name__ == '__main__': # pragma: no cover
- main()
-
- class _TestCases(unittest.TestCase):
- created_by_ref = '867c7563-79ae-435c-a265-9d8509cefac5'
- def setUp(self):
- d = os.path.realpath(tempfile.mkdtemp())
- self.basetempdir = d
- self.tempdir = os.path.join(d, 'subdir')
-
- shutil.copytree(os.path.join('fixtures', 'testfiles'),
- self.tempdir)
-
- def tearDown(self):
- shutil.rmtree(self.basetempdir)
- self.tempdir = None
-
- def test_mdbase(self):
- self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'unknosldkfj' })
- self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'metadata' })
-
- baseobj = {
- 'type': 'metadata',
- 'created_by_ref': self.created_by_ref,
- }
- origbase = copy.deepcopy(baseobj)
-
- # that when an MDBase object is created
- md = MDBase.create_obj(baseobj)
-
- # it doesn't modify the passed in object (when adding
- # generated properties)
- self.assertEqual(baseobj, origbase)
-
- # and it has the generted properties
- # Note: cannot mock the functions as they are already
- # referenced at creation time
- self.assertIn('uuid', md)
- self.assertIn('modified', md)
-
- # That you can create a new version using new_version
- md2 = md.new_version(('dc:creator', 'Jim Bob',))
-
- # that they are different
- self.assertNotEqual(md, md2)
-
- # and that the new modified time is different from the old
- self.assertNotEqual(md.modified, md2.modified)
-
- # and that the modification is present
- self.assertEqual(md2['dc:creator'], [ 'Jim Bob' ])
-
- def test_makehash(self):
- self.assertRaises(ValueError, ObjectStore.makehash, 'slkj')
- self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA')
- self.assertRaises(ValueError, ObjectStore.makehash, 'bogushash:9e0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA', strict=False)
-
- self.assertEqual(ObjectStore.makehash('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e', strict=False), 'sha512:cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e')
- self.assertEqual(ObjectStore.makehash('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', strict=False), 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
-
- def test_enumeratedir(self):
- files = enumeratedir(self.tempdir, self.created_by_ref)
- ftest = files[0]
- fname = 'test.txt'
-
- # make sure that they are of type MDBase
- self.assertIsInstance(ftest, MDBase)
-
- oldid = ftest.id
- self.assertEqual(ftest.filename, fname)
- self.assertEqual(ftest.dir, self.tempdir)
- # XXX - do we add host information?
- self.assertEqual(ftest.id, uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
- '/'.join(os.path.split(self.tempdir) +
- ( fname, ))))
- self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20, 21, 47, 36))
- self.assertEqual(ftest.size, 15)
- self.assertIn('sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f', ftest.hashes)
-
- # XXX - make sure works w/ relative dirs
- files = enumeratedir(os.path.relpath(self.tempdir),
- self.created_by_ref)
- self.assertEqual(oldid, files[0].id)
-
- def test_mdbaseoverlay(self):
- objst = ObjectStore(self.created_by_ref)
-
- # that a base object
- bid = uuid.uuid4()
- objst.loadobj({
- 'type': 'metadata',
- 'uuid': bid,
- 'modified': datetime.datetime(2019, 6, 10, 14, 3, 10),
- 'created_by_ref': self.created_by_ref,
- 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
- 'someprop': [ 'somevalue' ],
- 'lang': 'en',
- })
-
- # can have an overlay object
- oid = uuid.uuid4()
- dhash = 'sha256:a7c96262c21db9a06fd49e307d694fd95f624569f9b35bb3ffacd880440f9787'
- objst.loadobj({
- 'type': 'metadata',
- 'uuid': oid,
- 'modified': datetime.datetime(2019, 6, 10, 18, 3, 10),
- 'created_by_ref': self.created_by_ref,
- 'hashes': [ dhash ],
- 'overlay_refs': [ bid ],
- 'lang': 'en',
- })
-
- # and that when you get it's properties
- oobj = objst.by_id(oid)
- odict = dict(oobj.items())
-
- # that is has the overlays property
- self.assertEqual(odict['overlay_refs'], [ bid ])
-
- def test_objectstore(self):
- objst = ObjectStore.load(os.path.join('fixtures', 'sample.data.pasn1'))
-
- objst.loadobj({
- 'type': 'metadata',
- 'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7',
- 'modified': datetime.datetime(2019, 5, 31, 14, 3, 10),
- 'created_by_ref': self.created_by_ref,
- 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
- 'lang': 'en',
- })
-
- lst = objst.by_hash('91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada')
- self.assertEqual(len(lst), 2)
-
- byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96')
-
- self.assertIsInstance(byid, MetaData)
- self.assertIn(byid, lst)
-
- r = byid
-
- self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96')
- self.assertEqual(r['dc:creator'], [ u'John-Mark Gurney' ])
-
- fname = 'testfile.pasn1'
- objst.store(fname)
-
- with open(fname) as fp:
- objs = _asn1coder.loads(fp.read())
-
- os.unlink(fname)
-
- self.assertEqual(len(objs), len(objst))
-
- self.assertEqual(objs['created_by_ref'], self.created_by_ref)
-
- for i in objs['objects']:
- self.assertEqual(objst.by_id(i['uuid']), i)
-
- testfname = os.path.join(self.tempdir, 'test.txt')
- self.assertEqual(objst.by_file(testfname), [ byid ])
- self.assertEqual(objst.by_file(testfname), [ byid ])
-
- self.assertRaises(KeyError, objst.by_file, '/dev/null')
-
- # XXX make sure that object store contains fileobject
-
- # Tests to add:
- # Non-duplicates when same metadata is located by multiple hashes.
-
- def test_main(self):
- # Test the main runner, this is only testing things that are
- # specific to running the program, like where the store is
- # created.
-
- # setup object store
- storefname = os.path.join(self.tempdir, 'storefname')
- shutil.copy(os.path.join('fixtures', 'sample.data.pasn1'), storefname)
-
- # setup test fname
- testfname = os.path.join(self.tempdir, 'test.txt')
-
- import sys
- import StringIO
- import itertools
-
- with mock.patch('os.path.expanduser', side_effect=itertools.repeat(storefname)) \
- as eu:
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-l', testfname ])) as (stdout, argv):
- main()
- self.assertEqual(stdout.getvalue(),
- 'dc:creator:\tJohn-Mark Gurney\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n')
- eu.assert_called_with('~/.medashare_store.pasn1')
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-a', 'dc:creator=Another user', '-a', 'foo=bar=baz', testfname ])) as (stdout, argv):
- main()
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-l', testfname ])) as (stdout, argv):
- main()
- self.assertEqual(stdout.getvalue(),
- 'dc:creator:\tAnother user\ndc:creator:\tJohn-Mark Gurney\nfoo:\tbar=baz\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n')
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-d', 'dc:creator', testfname ])) as (stdout, argv):
- main()
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-l', testfname ])) as (stdout, argv):
- main()
- self.assertEqual(stdout.getvalue(),
- 'foo:\tbar=baz\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n')
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-a', 'foo=bleh', testfname ])) as (stdout, argv):
- main()
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-l', testfname ])) as (stdout, argv):
- main()
- self.assertEqual(stdout.getvalue(),
- 'foo:\tbar=baz\nfoo:\tbleh\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n')
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-d', 'foo=bar=baz', testfname ])) as (stdout, argv):
- main()
-
- with nested(mock.patch('sys.stdout',
- StringIO.StringIO()), mock.patch('sys.argv',
- [ 'progname', '-l', testfname ])) as (stdout, argv):
- main()
- self.assertEqual(stdout.getvalue(),
- 'foo:\tbleh\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n')
|