From 6815ebdf92fc7b17f5006dbdb14f3e49236b93ec Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Sat, 1 Jun 2019 23:34:36 -0700 Subject: [PATCH] add default hash, add base object + common property enforcement, FileObject now has hashes and other properties, use realpath for the dir to get common path --- ui/cli.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 12 deletions(-) diff --git a/ui/cli.py b/ui/cli.py index 0e1c50d..7605e8f 100644 --- a/ui/cli.py +++ b/ui/cli.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import datetime import hashlib import pasn1 import os.path @@ -9,17 +10,42 @@ import tempfile import unittest import uuid +# The UUID for the namespace representing the path to a file +_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6') + +_defaulthash = 'sha512' _validhashes = set([ 'sha256', 'sha512' ]) _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes } # XXX - add validation -class ObjWrap(object): +class MDBase(object): '''This is a simple wrapper that turns a JSON object into a pythonesc object where attribute accesses work.''' + _common_properties = [ 'uuid', 'type', 'modified' ] + def __init__(self, obj): + for x in self._common_properties: + if x not in obj: + raise ValueError('common property %s not present' % `x`) + self._obj = obj + @classmethod + def create_obj(cls, obj): + '''Using obj as a base, create an instead of MDBase of the + correct type. + + If the correct type is not found, a ValueError is raised.''' + + ty = obj['type'] + + for i in cls.__subclasses__(): + if i._type == ty: + return i(obj) + else: + raise ValueError('Unable to find class for type %s' % `ty`) + def __getattr__(self, k): return self._obj[k] @@ -32,6 +58,9 @@ class ObjWrap(object): def __eq__(self, o): return cmp(self._obj, o) == 0 +class MetaData(MDBase): + _type = 'metadata' + def _trytodict(o): try: return 'dict', o.__to_dict__() @@ -86,8 +115,7 @@ class ObjectStore(object): def loadobj(self, obj): '''Load obj into the data store.''' - if not isinstance(obj, ObjWrap): - obj = ObjWrap(obj) + obj = MDBase.create_obj(obj) id = uuid.UUID(obj.uuid) self._uuids[id] = obj @@ -120,11 +148,46 @@ class ObjectStore(object): h = self.makehash(hash, strict=False) return self._hashes[h] +def _hashfile(fname): + hash = getattr(hashlib, _defaulthash)() + with open(fname) as fp: + r = fp.read() + hash.update(r) + + return '%s:%s' % (_defaulthash, hash.hexdigest()) + class FileObject(object): def __init__(self, _dir, filename): - self._dir = _dir + self._dir = os.path.realpath(_dir) self._fname = filename + # XXX make sure this is correct + self._id = uuid.uuid5(_NAMESPACE_MEDASHARE_PATH, + '/'.join(os.path.split(self._dir) + ( self._fname, ))) + fname = os.path.join(_dir, filename) + s = os.stat(fname) + self._mtime = datetime.datetime.utcfromtimestamp(s.st_mtime) + self._size = s.st_size + self._hashes = ( _hashfile(fname), ) + + @property + def hashes(self): + '''The hashes for this file.''' + + # XXX - should return a frozen dict + return self._hashes + + @property + def mtime(self): + '''The last modified date of the file.''' + + return self._mtime + + @property + def size(self): + '''The length of the file in bytes.''' + + return self._size @property def filename(self): '''The name of the file.''' @@ -141,10 +204,9 @@ class FileObject(object): def id(self): '''The UUID of the path to this file.''' - # XXX make sure this is correct - return uuid.uuid5(uuid.NAMESPACE_URL, 'someurl' + '/'.join(os.path.split(self._dir) + ( self._fname, ))) + return self._id -def enumeratedir(_dir): +def enumeratedir(_dir='.'): '''Enumerate all the files and directories (not recursive) in _dir. Returned is a list of FileObjects.''' @@ -153,7 +215,7 @@ def enumeratedir(_dir): class _TestCases(unittest.TestCase): def setUp(self): - d = tempfile.mkdtemp() + d = os.path.realpath(tempfile.mkdtemp()) self.basetempdir = d self.tempdir = os.path.join(d, 'subdir') @@ -164,6 +226,10 @@ class _TestCases(unittest.TestCase): shutil.rmtree(self.basetempdir) self.tempdir = None + def test_mdbase(self): + self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'unknosldkfj' }) + self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'metadata' }) + def test_makehash(self): self.assertRaises(ValueError, ObjectStore.makehash, 'slkj') self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA') @@ -176,11 +242,20 @@ class _TestCases(unittest.TestCase): ftest = files[0] fname = 'test.txt' + oldid = ftest.id self.assertEqual(ftest.filename, fname) self.assertEqual(ftest.dir, self.tempdir) - self.assertEqual(ftest.id, uuid.uuid5(uuid.NAMESPACE_URL, - 'someurl' + '/'.join(os.path.split(self.tempdir) + + # XXX - do we add host information? + self.assertEqual(ftest.id, uuid.uuid5(_NAMESPACE_MEDASHARE_PATH, + '/'.join(os.path.split(self.tempdir) + ( fname, )))) + self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20, 21, 47, 36)) + self.assertEqual(ftest.size, 15) + self.assertIn('sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f', ftest.hashes) + + # XXX - make sure works w/ relative dirs + files = enumeratedir(os.path.relpath(self.tempdir)) + self.assertEqual(oldid, files[0].id) def test_objectstore(self): objst = ObjectStore() @@ -190,6 +265,7 @@ class _TestCases(unittest.TestCase): objst.loadobj({ 'type': 'metadata', 'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7', + 'modified': datetime.datetime(2019, 5, 31, 14, 3, 10), 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ], 'lang': 'en', }) @@ -199,6 +275,7 @@ class _TestCases(unittest.TestCase): byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96') + self.assertIsInstance(byid, MetaData) self.assertIn(byid, lst) r = byid @@ -206,11 +283,14 @@ class _TestCases(unittest.TestCase): self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96') self.assertEqual(r['dc:author'], 'John-Mark Gurney') - objst.store('testfile.pasn1') + fname = 'testfile.pasn1' + objst.store(fname) - with open('testfile.pasn1') as fp: + with open(fname) as fp: objs = _asn1coder.loads(fp.read()) + os.unlink(fname) + self.assertEqual(len(objs), len(objst)) for i in objs: