|
@@ -3,14 +3,17 @@ |
|
|
import copy |
|
|
import copy |
|
|
import datetime |
|
|
import datetime |
|
|
import hashlib |
|
|
import hashlib |
|
|
import pasn1 |
|
|
|
|
|
|
|
|
import mock |
|
|
import os.path |
|
|
import os.path |
|
|
|
|
|
import pasn1 |
|
|
import shutil |
|
|
import shutil |
|
|
import string |
|
|
import string |
|
|
import tempfile |
|
|
import tempfile |
|
|
import unittest |
|
|
import unittest |
|
|
import uuid |
|
|
import uuid |
|
|
|
|
|
|
|
|
|
|
|
from contextlib import nested |
|
|
|
|
|
|
|
|
# The UUID for the namespace representing the path to a file |
|
|
# The UUID for the namespace representing the path to a file |
|
|
_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6') |
|
|
_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6') |
|
|
|
|
|
|
|
@@ -18,6 +21,16 @@ _defaulthash = 'sha512' |
|
|
_validhashes = set([ 'sha256', 'sha512' ]) |
|
|
_validhashes = set([ 'sha256', 'sha512' ]) |
|
|
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes } |
|
|
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes } |
|
|
|
|
|
|
|
|
|
|
|
def _iterdictlist(obj): |
|
|
|
|
|
itms = obj.items() |
|
|
|
|
|
itms.sort() |
|
|
|
|
|
for k, v in itms: |
|
|
|
|
|
if isinstance(v, list): |
|
|
|
|
|
for i in v: |
|
|
|
|
|
yield k, i |
|
|
|
|
|
else: |
|
|
|
|
|
yield k, v |
|
|
|
|
|
|
|
|
# XXX - add validation |
|
|
# XXX - add validation |
|
|
class MDBase(object): |
|
|
class MDBase(object): |
|
|
'''This is a simple wrapper that turns a JSON object into a pythonesc |
|
|
'''This is a simple wrapper that turns a JSON object into a pythonesc |
|
@@ -27,7 +40,8 @@ class MDBase(object): |
|
|
'uuid': uuid.uuid4, |
|
|
'uuid': uuid.uuid4, |
|
|
'modified': datetime.datetime.utcnow |
|
|
'modified': datetime.datetime.utcnow |
|
|
} |
|
|
} |
|
|
_common_properties = [ 'created_by_ref' ] |
|
|
|
|
|
|
|
|
_common_properties = [ 'type', 'created_by_ref' ] # XXX - add lang? |
|
|
|
|
|
_common_names = set(_common_properties + _generated_properties.keys()) |
|
|
|
|
|
|
|
|
def __init__(self, obj): |
|
|
def __init__(self, obj): |
|
|
obj = copy.deepcopy(obj) |
|
|
obj = copy.deepcopy(obj) |
|
@@ -51,13 +65,18 @@ class MDBase(object): |
|
|
|
|
|
|
|
|
If the correct type is not found, a ValueError is raised.''' |
|
|
If the correct type is not found, a ValueError is raised.''' |
|
|
|
|
|
|
|
|
|
|
|
if isinstance(obj, cls): |
|
|
|
|
|
# XXX - copy? |
|
|
|
|
|
return obj |
|
|
|
|
|
|
|
|
ty = obj['type'] |
|
|
ty = obj['type'] |
|
|
|
|
|
|
|
|
for i in cls.__subclasses__(): |
|
|
for i in cls.__subclasses__(): |
|
|
if i._type == ty: |
|
|
if i._type == ty: |
|
|
return i(obj) |
|
|
return i(obj) |
|
|
else: |
|
|
else: |
|
|
raise ValueError('Unable to find class for type %s' % `ty`) |
|
|
|
|
|
|
|
|
raise ValueError('Unable to find class for type %s' % |
|
|
|
|
|
`ty`) |
|
|
|
|
|
|
|
|
def __getattr__(self, k): |
|
|
def __getattr__(self, k): |
|
|
return self._obj[k] |
|
|
return self._obj[k] |
|
@@ -71,6 +90,10 @@ class MDBase(object): |
|
|
def __eq__(self, o): |
|
|
def __eq__(self, o): |
|
|
return cmp(self._obj, o) == 0 |
|
|
return cmp(self._obj, o) == 0 |
|
|
|
|
|
|
|
|
|
|
|
def items(self, skipcommon=True): |
|
|
|
|
|
return [ (k, v) for k, v in self._obj.items() if k not in |
|
|
|
|
|
self._common_names ] |
|
|
|
|
|
|
|
|
class MetaData(MDBase): |
|
|
class MetaData(MDBase): |
|
|
_type = 'metadata' |
|
|
_type = 'metadata' |
|
|
|
|
|
|
|
@@ -85,7 +108,8 @@ _asn1coder = pasn1.ASN1DictCoder(coerce=_trytodict) |
|
|
class ObjectStore(object): |
|
|
class ObjectStore(object): |
|
|
'''A container to store for the various Metadata objects.''' |
|
|
'''A container to store for the various Metadata objects.''' |
|
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
|
|
|
def __init__(self, created_by_ref): |
|
|
|
|
|
self._created_by_ref = created_by_ref |
|
|
self._uuids = {} |
|
|
self._uuids = {} |
|
|
self._hashes = {} |
|
|
self._hashes = {} |
|
|
|
|
|
|
|
@@ -123,20 +147,29 @@ class ObjectStore(object): |
|
|
fname.''' |
|
|
fname.''' |
|
|
|
|
|
|
|
|
with open(fname, 'w') as fp: |
|
|
with open(fname, 'w') as fp: |
|
|
fp.write(_asn1coder.dumps(self._uuids.values())) |
|
|
|
|
|
|
|
|
obj = { |
|
|
|
|
|
'created_by_ref': self._created_by_ref, |
|
|
|
|
|
'objects': self._uuids.values(), |
|
|
|
|
|
} |
|
|
|
|
|
fp.write(_asn1coder.dumps(obj)) |
|
|
|
|
|
|
|
|
def loadobj(self, obj): |
|
|
def loadobj(self, obj): |
|
|
'''Load obj into the data store.''' |
|
|
'''Load obj into the data store.''' |
|
|
|
|
|
|
|
|
obj = MDBase.create_obj(obj) |
|
|
obj = MDBase.create_obj(obj) |
|
|
|
|
|
|
|
|
id = uuid.UUID(obj.uuid) |
|
|
|
|
|
|
|
|
if not isinstance(obj.uuid, uuid.UUID): |
|
|
|
|
|
id = uuid.UUID(obj.uuid) |
|
|
|
|
|
else: |
|
|
|
|
|
id = obj.uuid |
|
|
|
|
|
|
|
|
self._uuids[id] = obj |
|
|
self._uuids[id] = obj |
|
|
for j in obj.hashes: |
|
|
for j in obj.hashes: |
|
|
h = self.makehash(j) |
|
|
h = self.makehash(j) |
|
|
self._hashes.setdefault(h, []).append(obj) |
|
|
self._hashes.setdefault(h, []).append(obj) |
|
|
|
|
|
|
|
|
def load(self, fname): |
|
|
|
|
|
|
|
|
@classmethod |
|
|
|
|
|
def load(cls, fname): |
|
|
'''Load objects from the provided file name. |
|
|
'''Load objects from the provided file name. |
|
|
|
|
|
|
|
|
Basic validation will be done on the objects in the file. |
|
|
Basic validation will be done on the objects in the file. |
|
@@ -146,13 +179,20 @@ class ObjectStore(object): |
|
|
with open(fname) as fp: |
|
|
with open(fname) as fp: |
|
|
objs = _asn1coder.loads(fp.read()) |
|
|
objs = _asn1coder.loads(fp.read()) |
|
|
|
|
|
|
|
|
for i in objs: |
|
|
|
|
|
self.loadobj(i) |
|
|
|
|
|
|
|
|
obj = cls(objs['created_by_ref']) |
|
|
|
|
|
for i in objs['objects']: |
|
|
|
|
|
obj.loadobj(i) |
|
|
|
|
|
|
|
|
|
|
|
return obj |
|
|
|
|
|
|
|
|
def by_id(self, id): |
|
|
def by_id(self, id): |
|
|
'''Look up an object by it's UUID.''' |
|
|
'''Look up an object by it's UUID.''' |
|
|
|
|
|
|
|
|
uid = uuid.UUID(id) |
|
|
|
|
|
|
|
|
if not isinstance(id, uuid.UUID): |
|
|
|
|
|
uid = uuid.UUID(id) |
|
|
|
|
|
else: |
|
|
|
|
|
uid = id |
|
|
|
|
|
|
|
|
return self._uuids[uid] |
|
|
return self._uuids[uid] |
|
|
|
|
|
|
|
|
def by_hash(self, hash): |
|
|
def by_hash(self, hash): |
|
@@ -161,6 +201,27 @@ class ObjectStore(object): |
|
|
h = self.makehash(hash, strict=False) |
|
|
h = self.makehash(hash, strict=False) |
|
|
return self._hashes[h] |
|
|
return self._hashes[h] |
|
|
|
|
|
|
|
|
|
|
|
def by_file(self, fname): |
|
|
|
|
|
'''Return a metadata object for the file named fname.''' |
|
|
|
|
|
|
|
|
|
|
|
fid = FileObject.make_id(fname) |
|
|
|
|
|
try: |
|
|
|
|
|
fobj = self.by_id(fid) |
|
|
|
|
|
except KeyError: |
|
|
|
|
|
# unable to find it |
|
|
|
|
|
fobj = FileObject.from_file(fname, self._created_by_ref) |
|
|
|
|
|
self.loadobj(fobj) |
|
|
|
|
|
|
|
|
|
|
|
for i in fobj.hashes: |
|
|
|
|
|
j = self.by_hash(i) |
|
|
|
|
|
|
|
|
|
|
|
# Filter out non-metadata objects |
|
|
|
|
|
j = [ x for x in j if x.type == 'metadata' ] |
|
|
|
|
|
if j: |
|
|
|
|
|
return j |
|
|
|
|
|
else: |
|
|
|
|
|
raise KeyError('unable to find metadata for file') |
|
|
|
|
|
|
|
|
def _hashfile(fname): |
|
|
def _hashfile(fname): |
|
|
hash = getattr(hashlib, _defaulthash)() |
|
|
hash = getattr(hashlib, _defaulthash)() |
|
|
with open(fname) as fp: |
|
|
with open(fname) as fp: |
|
@@ -172,20 +233,27 @@ def _hashfile(fname): |
|
|
class FileObject(MDBase): |
|
|
class FileObject(MDBase): |
|
|
_type = 'file' |
|
|
_type = 'file' |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
|
def make_id(fname): |
|
|
|
|
|
'''Take a local file name, and make the id for it. Note that |
|
|
|
|
|
converts from the local path separator to a forward slash so |
|
|
|
|
|
that it will be the same between Windows and Unix systems.''' |
|
|
|
|
|
|
|
|
|
|
|
fname = os.path.realpath(fname) |
|
|
|
|
|
return uuid.uuid5(_NAMESPACE_MEDASHARE_PATH, |
|
|
|
|
|
'/'.join(os.path.split(fname))) |
|
|
|
|
|
|
|
|
@classmethod |
|
|
@classmethod |
|
|
def from_file(cls, _dir, filename, created_by_ref): |
|
|
|
|
|
_dir = os.path.realpath(_dir) |
|
|
|
|
|
fname = os.path.join(_dir, filename) |
|
|
|
|
|
s = os.stat(fname) |
|
|
|
|
|
|
|
|
def from_file(cls, filename, created_by_ref): |
|
|
|
|
|
s = os.stat(filename) |
|
|
obj = { |
|
|
obj = { |
|
|
'dir': _dir, |
|
|
|
|
|
|
|
|
'dir': os.path.dirname(filename), |
|
|
'created_by_ref': created_by_ref, |
|
|
'created_by_ref': created_by_ref, |
|
|
'filename': filename, |
|
|
|
|
|
'id': uuid.uuid5(_NAMESPACE_MEDASHARE_PATH, |
|
|
|
|
|
'/'.join(os.path.split(fname))), |
|
|
|
|
|
|
|
|
'filename': os.path.basename(filename), |
|
|
|
|
|
'id': cls.make_id(filename), |
|
|
'mtime': datetime.datetime.utcfromtimestamp(s.st_mtime), |
|
|
'mtime': datetime.datetime.utcfromtimestamp(s.st_mtime), |
|
|
'size': s.st_size, |
|
|
'size': s.st_size, |
|
|
'hashes': ( _hashfile(fname), ), |
|
|
|
|
|
|
|
|
'hashes': ( _hashfile(filename), ), |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -196,10 +264,35 @@ def enumeratedir(_dir, created_by_ref): |
|
|
|
|
|
|
|
|
Returned is a list of FileObjects.''' |
|
|
Returned is a list of FileObjects.''' |
|
|
|
|
|
|
|
|
return map(lambda x: FileObject.from_file(_dir, x, created_by_ref), |
|
|
|
|
|
|
|
|
return map(lambda x: FileObject.from_file(os.path.join(_dir, x), created_by_ref), |
|
|
os.listdir(_dir)) |
|
|
os.listdir(_dir)) |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
from optparse import OptionParser |
|
|
|
|
|
|
|
|
|
|
|
parser = OptionParser() |
|
|
|
|
|
parser.add_option('-l', action='store_true', dest='list', |
|
|
|
|
|
default=False, help='list metadata') |
|
|
|
|
|
|
|
|
|
|
|
options, args = parser.parse_args() |
|
|
|
|
|
|
|
|
|
|
|
storefname = os.path.expanduser('~/.medashare_store.pasn1') |
|
|
|
|
|
import sys |
|
|
|
|
|
#print >>sys.stderr, `storefname` |
|
|
|
|
|
objstr = ObjectStore.load(storefname) |
|
|
|
|
|
|
|
|
|
|
|
for i in args: |
|
|
|
|
|
for j in objstr.by_file(i): |
|
|
|
|
|
for k, v in _iterdictlist(j): |
|
|
|
|
|
print '%s:\t%s' % (k, v) |
|
|
|
|
|
|
|
|
|
|
|
#objstr.store() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': # pragma: no cover |
|
|
|
|
|
main() |
|
|
|
|
|
|
|
|
class _TestCases(unittest.TestCase): |
|
|
class _TestCases(unittest.TestCase): |
|
|
|
|
|
created_by_ref = '867c7563-79ae-435c-a265-9d8509cefac5' |
|
|
def setUp(self): |
|
|
def setUp(self): |
|
|
d = os.path.realpath(tempfile.mkdtemp()) |
|
|
d = os.path.realpath(tempfile.mkdtemp()) |
|
|
self.basetempdir = d |
|
|
self.basetempdir = d |
|
@@ -218,7 +311,7 @@ class _TestCases(unittest.TestCase): |
|
|
|
|
|
|
|
|
baseobj = { |
|
|
baseobj = { |
|
|
'type': 'metadata', |
|
|
'type': 'metadata', |
|
|
'created_by_ref': '867c7563-79ae-435c-a265-9d8509cefac5', |
|
|
|
|
|
|
|
|
'created_by_ref': self.created_by_ref, |
|
|
} |
|
|
} |
|
|
origbase = copy.deepcopy(baseobj) |
|
|
origbase = copy.deepcopy(baseobj) |
|
|
md = MDBase.create_obj(baseobj) |
|
|
md = MDBase.create_obj(baseobj) |
|
@@ -234,7 +327,7 @@ class _TestCases(unittest.TestCase): |
|
|
self.assertEqual(ObjectStore.makehash('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', strict=False), 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') |
|
|
self.assertEqual(ObjectStore.makehash('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', strict=False), 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') |
|
|
|
|
|
|
|
|
def test_enumeratedir(self): |
|
|
def test_enumeratedir(self): |
|
|
files = enumeratedir(self.tempdir, '867c7563-79ae-435c-a265-9d8509cefac5') |
|
|
|
|
|
|
|
|
files = enumeratedir(self.tempdir, self.created_by_ref) |
|
|
ftest = files[0] |
|
|
ftest = files[0] |
|
|
fname = 'test.txt' |
|
|
fname = 'test.txt' |
|
|
|
|
|
|
|
@@ -254,19 +347,17 @@ class _TestCases(unittest.TestCase): |
|
|
|
|
|
|
|
|
# XXX - make sure works w/ relative dirs |
|
|
# XXX - make sure works w/ relative dirs |
|
|
files = enumeratedir(os.path.relpath(self.tempdir), |
|
|
files = enumeratedir(os.path.relpath(self.tempdir), |
|
|
'867c7563-79ae-435c-a265-9d8509cefac5') |
|
|
|
|
|
|
|
|
self.created_by_ref) |
|
|
self.assertEqual(oldid, files[0].id) |
|
|
self.assertEqual(oldid, files[0].id) |
|
|
|
|
|
|
|
|
def test_objectstore(self): |
|
|
def test_objectstore(self): |
|
|
objst = ObjectStore() |
|
|
|
|
|
|
|
|
|
|
|
objst.load(os.path.join('fixtures', 'sample.data.pasn1')) |
|
|
|
|
|
|
|
|
objst = ObjectStore.load(os.path.join('fixtures', 'sample.data.pasn1')) |
|
|
|
|
|
|
|
|
objst.loadobj({ |
|
|
objst.loadobj({ |
|
|
'type': 'metadata', |
|
|
'type': 'metadata', |
|
|
'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7', |
|
|
'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7', |
|
|
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10), |
|
|
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10), |
|
|
'created_by_ref': '867c7563-79ae-435c-a265-9d8509cefac5', |
|
|
|
|
|
|
|
|
'created_by_ref': self.created_by_ref, |
|
|
'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ], |
|
|
'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ], |
|
|
'lang': 'en', |
|
|
'lang': 'en', |
|
|
}) |
|
|
}) |
|
@@ -294,5 +385,48 @@ class _TestCases(unittest.TestCase): |
|
|
|
|
|
|
|
|
self.assertEqual(len(objs), len(objst)) |
|
|
self.assertEqual(len(objs), len(objst)) |
|
|
|
|
|
|
|
|
for i in objs: |
|
|
|
|
|
|
|
|
self.assertEqual(objs['created_by_ref'], self.created_by_ref) |
|
|
|
|
|
|
|
|
|
|
|
for i in objs['objects']: |
|
|
self.assertEqual(objst.by_id(i['uuid']), i) |
|
|
self.assertEqual(objst.by_id(i['uuid']), i) |
|
|
|
|
|
|
|
|
|
|
|
testfname = os.path.join(self.tempdir, 'test.txt') |
|
|
|
|
|
self.assertEqual(objst.by_file(testfname), [ byid ]) |
|
|
|
|
|
|
|
|
|
|
|
# XXX make sure that object store contains fileobject |
|
|
|
|
|
|
|
|
|
|
|
def test_main(self): |
|
|
|
|
|
# Test the main runner, this is only testing things that are |
|
|
|
|
|
# specific to running the program, like where the store is |
|
|
|
|
|
# created. |
|
|
|
|
|
|
|
|
|
|
|
# setup object store |
|
|
|
|
|
storefname = os.path.join(self.tempdir, 'storefname') |
|
|
|
|
|
shutil.copy(os.path.join('fixtures', 'sample.data.pasn1'), storefname) |
|
|
|
|
|
|
|
|
|
|
|
# setup test fname |
|
|
|
|
|
testfname = os.path.join(self.tempdir, 'test.txt') |
|
|
|
|
|
|
|
|
|
|
|
import sys |
|
|
|
|
|
import StringIO |
|
|
|
|
|
|
|
|
|
|
|
with mock.patch('os.path.expanduser', side_effect=(storefname, )) \ |
|
|
|
|
|
as eu: |
|
|
|
|
|
with nested(mock.patch('sys.stdout', |
|
|
|
|
|
StringIO.StringIO()), mock.patch('sys.argv', |
|
|
|
|
|
[ 'progname', '-l', testfname ])) as (stdout, argv): |
|
|
|
|
|
main() |
|
|
|
|
|
self.assertEqual(stdout.getvalue(), |
|
|
|
|
|
'dc:author:\tJohn-Mark Gurney\nhashes:\tsha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada\nhashes:\tsha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f\nlang:\ten\n') |
|
|
|
|
|
eu.assert_called_with('~/.medashare_store.pasn1') |
|
|
|
|
|
|
|
|
|
|
|
if False: # pragma: no cover |
|
|
|
|
|
# Example how to force proper output |
|
|
|
|
|
with mock.patch('sys.stdout', StringIO.StringIO()) as ssw: |
|
|
|
|
|
print 'foobar' |
|
|
|
|
|
self.assertEqual(ssw.getvalue(), 'foobar\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# XXX - how to do created_by for object store? |
|
|
|
|
|
# store it in the loaded object? |
|
|
|
|
|
# if so, have to restructure how we handle loading |