Browse Source

add default hash, add base object + common property enforcement,

FileObject now has hashes and other properties, use realpath
for the dir to get common path
main
John-Mark Gurney 6 years ago
parent
commit
6815ebdf92
1 changed files with 92 additions and 12 deletions
  1. +92
    -12
      ui/cli.py

+ 92
- 12
ui/cli.py View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python


import datetime
import hashlib import hashlib
import pasn1 import pasn1
import os.path import os.path
@@ -9,17 +10,42 @@ import tempfile
import unittest import unittest
import uuid import uuid


# The UUID for the namespace representing the path to a file
_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6')

_defaulthash = 'sha512'
_validhashes = set([ 'sha256', 'sha512' ]) _validhashes = set([ 'sha256', 'sha512' ])
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes } _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes }


# XXX - add validation # XXX - add validation
class ObjWrap(object):
class MDBase(object):
'''This is a simple wrapper that turns a JSON object into a pythonesc '''This is a simple wrapper that turns a JSON object into a pythonesc
object where attribute accesses work.''' object where attribute accesses work.'''


_common_properties = [ 'uuid', 'type', 'modified' ]

def __init__(self, obj): def __init__(self, obj):
for x in self._common_properties:
if x not in obj:
raise ValueError('common property %s not present' % `x`)

self._obj = obj self._obj = obj


@classmethod
def create_obj(cls, obj):
'''Using obj as a base, create an instead of MDBase of the
correct type.

If the correct type is not found, a ValueError is raised.'''

ty = obj['type']

for i in cls.__subclasses__():
if i._type == ty:
return i(obj)
else:
raise ValueError('Unable to find class for type %s' % `ty`)

def __getattr__(self, k): def __getattr__(self, k):
return self._obj[k] return self._obj[k]


@@ -32,6 +58,9 @@ class ObjWrap(object):
def __eq__(self, o): def __eq__(self, o):
return cmp(self._obj, o) == 0 return cmp(self._obj, o) == 0


class MetaData(MDBase):
_type = 'metadata'

def _trytodict(o): def _trytodict(o):
try: try:
return 'dict', o.__to_dict__() return 'dict', o.__to_dict__()
@@ -86,8 +115,7 @@ class ObjectStore(object):
def loadobj(self, obj): def loadobj(self, obj):
'''Load obj into the data store.''' '''Load obj into the data store.'''


if not isinstance(obj, ObjWrap):
obj = ObjWrap(obj)
obj = MDBase.create_obj(obj)


id = uuid.UUID(obj.uuid) id = uuid.UUID(obj.uuid)
self._uuids[id] = obj self._uuids[id] = obj
@@ -120,11 +148,46 @@ class ObjectStore(object):
h = self.makehash(hash, strict=False) h = self.makehash(hash, strict=False)
return self._hashes[h] return self._hashes[h]


def _hashfile(fname):
hash = getattr(hashlib, _defaulthash)()
with open(fname) as fp:
r = fp.read()
hash.update(r)

return '%s:%s' % (_defaulthash, hash.hexdigest())

class FileObject(object): class FileObject(object):
def __init__(self, _dir, filename): def __init__(self, _dir, filename):
self._dir = _dir
self._dir = os.path.realpath(_dir)
self._fname = filename self._fname = filename


# XXX make sure this is correct
self._id = uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
'/'.join(os.path.split(self._dir) + ( self._fname, )))
fname = os.path.join(_dir, filename)
s = os.stat(fname)
self._mtime = datetime.datetime.utcfromtimestamp(s.st_mtime)
self._size = s.st_size
self._hashes = ( _hashfile(fname), )

@property
def hashes(self):
'''The hashes for this file.'''

# XXX - should return a frozen dict
return self._hashes

@property
def mtime(self):
'''The last modified date of the file.'''

return self._mtime

@property
def size(self):
'''The length of the file in bytes.'''

return self._size
@property @property
def filename(self): def filename(self):
'''The name of the file.''' '''The name of the file.'''
@@ -141,10 +204,9 @@ class FileObject(object):
def id(self): def id(self):
'''The UUID of the path to this file.''' '''The UUID of the path to this file.'''


# XXX make sure this is correct
return uuid.uuid5(uuid.NAMESPACE_URL, 'someurl' + '/'.join(os.path.split(self._dir) + ( self._fname, )))
return self._id


def enumeratedir(_dir):
def enumeratedir(_dir='.'):
'''Enumerate all the files and directories (not recursive) in _dir. '''Enumerate all the files and directories (not recursive) in _dir.


Returned is a list of FileObjects.''' Returned is a list of FileObjects.'''
@@ -153,7 +215,7 @@ def enumeratedir(_dir):


class _TestCases(unittest.TestCase): class _TestCases(unittest.TestCase):
def setUp(self): def setUp(self):
d = tempfile.mkdtemp()
d = os.path.realpath(tempfile.mkdtemp())
self.basetempdir = d self.basetempdir = d
self.tempdir = os.path.join(d, 'subdir') self.tempdir = os.path.join(d, 'subdir')


@@ -164,6 +226,10 @@ class _TestCases(unittest.TestCase):
shutil.rmtree(self.basetempdir) shutil.rmtree(self.basetempdir)
self.tempdir = None self.tempdir = None


def test_mdbase(self):
self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'unknosldkfj' })
self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'metadata' })

def test_makehash(self): def test_makehash(self):
self.assertRaises(ValueError, ObjectStore.makehash, 'slkj') self.assertRaises(ValueError, ObjectStore.makehash, 'slkj')
self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA') self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA')
@@ -176,11 +242,20 @@ class _TestCases(unittest.TestCase):
ftest = files[0] ftest = files[0]
fname = 'test.txt' fname = 'test.txt'


oldid = ftest.id
self.assertEqual(ftest.filename, fname) self.assertEqual(ftest.filename, fname)
self.assertEqual(ftest.dir, self.tempdir) self.assertEqual(ftest.dir, self.tempdir)
self.assertEqual(ftest.id, uuid.uuid5(uuid.NAMESPACE_URL,
'someurl' + '/'.join(os.path.split(self.tempdir) +
# XXX - do we add host information?
self.assertEqual(ftest.id, uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
'/'.join(os.path.split(self.tempdir) +
( fname, )))) ( fname, ))))
self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20, 21, 47, 36))
self.assertEqual(ftest.size, 15)
self.assertIn('sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f', ftest.hashes)

# XXX - make sure works w/ relative dirs
files = enumeratedir(os.path.relpath(self.tempdir))
self.assertEqual(oldid, files[0].id)


def test_objectstore(self): def test_objectstore(self):
objst = ObjectStore() objst = ObjectStore()
@@ -190,6 +265,7 @@ class _TestCases(unittest.TestCase):
objst.loadobj({ objst.loadobj({
'type': 'metadata', 'type': 'metadata',
'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7', 'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7',
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10),
'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ], 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
'lang': 'en', 'lang': 'en',
}) })
@@ -199,6 +275,7 @@ class _TestCases(unittest.TestCase):


byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96') byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96')


self.assertIsInstance(byid, MetaData)
self.assertIn(byid, lst) self.assertIn(byid, lst)


r = byid r = byid
@@ -206,11 +283,14 @@ class _TestCases(unittest.TestCase):
self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96') self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96')
self.assertEqual(r['dc:author'], 'John-Mark Gurney') self.assertEqual(r['dc:author'], 'John-Mark Gurney')


objst.store('testfile.pasn1')
fname = 'testfile.pasn1'
objst.store(fname)


with open('testfile.pasn1') as fp:
with open(fname) as fp:
objs = _asn1coder.loads(fp.read()) objs = _asn1coder.loads(fp.read())


os.unlink(fname)

self.assertEqual(len(objs), len(objst)) self.assertEqual(len(objs), len(objst))


for i in objs: for i in objs:


Loading…
Cancel
Save