@@ -30,6 +30,9 @@ import uuid
# The UUID for the namespace representing the path to a file
_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6')
# useful for debugging when stderr is redirected/captured
_real_stderr = sys.stderr
_defaulthash = 'sha512'
_validhashes = set([ 'sha256', 'sha512' ])
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in
@@ -70,7 +73,8 @@ class MDBase(object):
_generated_properties = {
'uuid': uuid.uuid4,
'modified': datetime.datetime.utcnow
'modified': lambda: datetime.datetime.now(
tz=datetime.timezone.utc),
}
# When decoding, the decoded value should be passed to this function
@@ -368,7 +372,7 @@ class Persona(object):
return True
def by_file(self, fname):
'''Return a metadata object for the file named fname.'''
'''Return a file object for the file named fname.'''
fobj = FileObject.from_file(fname, self._created_by_ref)
@@ -421,14 +425,20 @@ class ObjectStore(object):
def __len__(self):
return len(self._uuids)
def __iter__(self):
return iter(self._uuids.values())
def store(self, fname):
'''Write out the objects in the store to the file named
fname.'''
# eliminate objs stored by multiple uuids (FileObjects)
objs = { id(x): x for x in self._uuids.values() }
with open(fname, 'wb') as fp:
obj = {
'created_by_ref': self._created_by_ref,
'objects': list(self._uuid s.values()),
'objects': list(obj s.values()),
}
fp.write(_asn1coder.dumps(obj))
@@ -438,6 +448,10 @@ class ObjectStore(object):
obj = MDBase.create_obj(obj)
self._uuids[obj.uuid] = obj
if obj.type == 'file':
self._uuids[_makeuuid(obj.id)] = obj
for j in obj.hashes:
h = self.makehash(j)
self._hashes.setdefault(h, []).append(obj)
@@ -479,14 +493,10 @@ class ObjectStore(object):
'''Return a metadata object for the file named fname.'''
fid = FileObject.make_id(fname)
try:
fobj = self.by_id(fid)
except KeyError:
# unable to find it
fobj = FileObject.from_file(fname, self._created_by_ref)
self.loadobj(fobj)
# XXX - does not verify
fobj = self.by_id(fid)
fobj.verify()
for i in fobj.hashes:
j = self.by_hash(i)
@@ -536,14 +546,27 @@ class FileObject(MDBase):
'created_by_ref': created_by_ref,
'filename': os.path.basename(filename),
'id': cls.make_id(filename),
'mtime': datetime.datetime.utcfromtimestamp(s.st_mtime),
'mtime': datetime.datetime.fromtimestamp(s.st_mtime,
tz=datetime.timezone.utc),
'size': s.st_size,
'hashes': [ _hashfile(filename), ],
}
return cls(obj)
def verify(self, complete=False):
'''Verify that this FileObject is still valid. It will
by default, only do a mtime verification.'''
s = os.stat(os.path.join(self.dir, self.filename))
mtimets = datetime.datetime.fromtimestamp(s.st_mtime,
tz=datetime.timezone.utc).timestamp()
if self.mtime.timestamp() != mtimets or \
self.size != s.st_size:
raise ValueError('file %s has changed' %
repr(self.filename))
def enumeratedir(_dir, created_by_ref):
'''Enumerate all the files and directories (not recursive) in _dir.
@@ -671,21 +694,40 @@ def cmd_modify(options):
write_objstore(options, objstr)
def cmd_dump(options):
persona, objstr = get_objstore(options)
for i in objstr:
print(repr(i))
def cmd_list(options):
persona, objstr = get_objstore(options)
for i in options.files:
try:
for j in objstr.by_file(i):
#print >>sys.stderr, `j._obj`
for k, v in _iterdictlist(j):
print('%s:\t%s' % (k, v))
except (KeyError, FileNotFoundError):
objs = objstr.by_file(i)
except (ValueError, KeyError):
# create the file, it may have the same hash
# as something else
try:
fobj = persona.by_file(i)
objstr.loadobj(fobj)
objs = objstr.by_file(i)
except (FileNotFoundError, KeyError):
print('ERROR: file not found: %s' % repr(i), file=sys.stderr)
sys.exit(1)
except FileNotFoundError:
# XXX - tell the difference?
print('ERROR: file not found: %s' % repr(i),
file=sys.stderr)
sys.exit(1)
for j in objstr.by_file(i):
#print >>sys.stderr, `j._obj`
for k, v in _iterdictlist(j):
print('%s:\t%s' % (k, v))
def main():
import argparse
@@ -723,6 +765,9 @@ def main():
help='files to modify')
parser_list.set_defaults(func=cmd_list)
parser_dump = subparsers.add_parser('dump', help='dump all the objects')
parser_dump.set_defaults(func=cmd_dump)
options = parser.parse_args()
fun = options.func
@@ -875,7 +920,8 @@ class _TestCases(unittest.TestCase):
self.assertEqual(ftest.id, uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
'/'.join(os.path.split(self.tempdir) +
( fname, ))))
self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20, 21, 47, 36))
self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20,
21, 47, 36, tzinfo=datetime.timezone.utc))
self.assertEqual(ftest.size, 15)
self.assertIn('sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f', ftest.hashes)
@@ -972,6 +1018,8 @@ class _TestCases(unittest.TestCase):
# has the correct created_by_ref
self.assertEqual(testobj.created_by_ref, idobj.uuid)
self.assertEqual(testobj.type, 'file')
# and has a signature
self.assertIn('sig', testobj)
@@ -1026,17 +1074,15 @@ class _TestCases(unittest.TestCase):
# and that it can be verified
persona.verify(mdobj)
# that when round tripped through pasn1.
a = mdobj.encode()
b = MDBase.decode(a)
def test_objectstore(self):
persona = Persona.load(os.path.join('fixtures', 'sample.persona.pasn1'))
objst = ObjectStore.load(os.path.join('fixtures', 'sample.data.pasn1'))
objst.loadobj({
'type': 'metadata',
'uuid': uuid.UUID('c9a1d1e2-3109-4efd-8948-577dc15e44e7'),
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10),
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10,
tzinfo=datetime.timezone.utc),
'created_by_ref': self.created_by_ref,
'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
'lang': 'en',
@@ -1055,6 +1101,7 @@ class _TestCases(unittest.TestCase):
self.assertEqual(r.uuid, uuid.UUID('3e466e06-45de-4ecc-84ba-2d2a3d970e96'))
self.assertEqual(r['dc:creator'], [ 'John-Mark Gurney' ])
# test storing the object store
fname = 'testfile.pasn1'
objst.store(fname)
@@ -1067,12 +1114,19 @@ class _TestCases(unittest.TestCase):
self.assertEqual(objs['created_by_ref'], self.created_by_ref.bytes)
# make sure that the read back data matches
for i in objs['objects']:
i['created_by_ref'] = uuid.UUID(bytes=i['created_by_ref'])
i['uuid'] = uuid.UUID(bytes=i['uuid'])
self.assertEqual(objst.by_id(i['uuid']), i)
# that a file
testfname = os.path.join(self.tempdir, 'test.txt')
# when registered
objst.loadobj(persona.by_file(testfname))
# can be found
self.assertEqual(objst.by_file(testfname), [ byid ])
self.assertEqual(objst.by_file(testfname), [ byid ])
@@ -1113,6 +1167,13 @@ class _TestCases(unittest.TestCase):
elif special == 'change newfile.txt':
with open(newtestfname, 'w') as fp:
fp.write('some new contents')
elif special == 'verify store object cnt':
with open(storefname, 'rb') as fp:
pasn1obj = pasn1.loads(fp.read())
objcnt = len(pasn1obj['objects'])
self.assertEqual(objcnt, cmd['count'])
else: # pragma: no cover
raise ValueError('unhandled special: %s' % repr(special))
continue
@@ -1172,8 +1233,6 @@ class _TestCases(unittest.TestCase):
import itertools
real_stderr = sys.stderr
with mock.patch('os.path.expanduser', side_effect=expandusermock) \
as eu, mock.patch('medashare.cli.open') as op:
# that when opening the store and identity fails