Browse Source

add default hash, add base object + common property enforcement,

FileObject now has hashes and other properties, use realpath
for the dir to get common path
main
John-Mark Gurney 5 years ago
parent
commit
6815ebdf92
1 changed files with 92 additions and 12 deletions
  1. +92
    -12
      ui/cli.py

+ 92
- 12
ui/cli.py View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python

import datetime
import hashlib
import pasn1
import os.path
@@ -9,17 +10,42 @@ import tempfile
import unittest
import uuid

# The UUID for the namespace representing the path to a file
_NAMESPACE_MEDASHARE_PATH = uuid.UUID('f6f36b62-3770-4a68-bc3d-dc3e31e429e6')

_defaulthash = 'sha512'
_validhashes = set([ 'sha256', 'sha512' ])
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes }

# XXX - add validation
class ObjWrap(object):
class MDBase(object):
'''This is a simple wrapper that turns a JSON object into a pythonesc
object where attribute accesses work.'''

_common_properties = [ 'uuid', 'type', 'modified' ]

def __init__(self, obj):
for x in self._common_properties:
if x not in obj:
raise ValueError('common property %s not present' % `x`)

self._obj = obj

@classmethod
def create_obj(cls, obj):
'''Using obj as a base, create an instead of MDBase of the
correct type.

If the correct type is not found, a ValueError is raised.'''

ty = obj['type']

for i in cls.__subclasses__():
if i._type == ty:
return i(obj)
else:
raise ValueError('Unable to find class for type %s' % `ty`)

def __getattr__(self, k):
return self._obj[k]

@@ -32,6 +58,9 @@ class ObjWrap(object):
def __eq__(self, o):
return cmp(self._obj, o) == 0

class MetaData(MDBase):
_type = 'metadata'

def _trytodict(o):
try:
return 'dict', o.__to_dict__()
@@ -86,8 +115,7 @@ class ObjectStore(object):
def loadobj(self, obj):
'''Load obj into the data store.'''

if not isinstance(obj, ObjWrap):
obj = ObjWrap(obj)
obj = MDBase.create_obj(obj)

id = uuid.UUID(obj.uuid)
self._uuids[id] = obj
@@ -120,11 +148,46 @@ class ObjectStore(object):
h = self.makehash(hash, strict=False)
return self._hashes[h]

def _hashfile(fname):
hash = getattr(hashlib, _defaulthash)()
with open(fname) as fp:
r = fp.read()
hash.update(r)

return '%s:%s' % (_defaulthash, hash.hexdigest())

class FileObject(object):
def __init__(self, _dir, filename):
self._dir = _dir
self._dir = os.path.realpath(_dir)
self._fname = filename

# XXX make sure this is correct
self._id = uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
'/'.join(os.path.split(self._dir) + ( self._fname, )))
fname = os.path.join(_dir, filename)
s = os.stat(fname)
self._mtime = datetime.datetime.utcfromtimestamp(s.st_mtime)
self._size = s.st_size
self._hashes = ( _hashfile(fname), )

@property
def hashes(self):
'''The hashes for this file.'''

# XXX - should return a frozen dict
return self._hashes

@property
def mtime(self):
'''The last modified date of the file.'''

return self._mtime

@property
def size(self):
'''The length of the file in bytes.'''

return self._size
@property
def filename(self):
'''The name of the file.'''
@@ -141,10 +204,9 @@ class FileObject(object):
def id(self):
'''The UUID of the path to this file.'''

# XXX make sure this is correct
return uuid.uuid5(uuid.NAMESPACE_URL, 'someurl' + '/'.join(os.path.split(self._dir) + ( self._fname, )))
return self._id

def enumeratedir(_dir):
def enumeratedir(_dir='.'):
'''Enumerate all the files and directories (not recursive) in _dir.

Returned is a list of FileObjects.'''
@@ -153,7 +215,7 @@ def enumeratedir(_dir):

class _TestCases(unittest.TestCase):
def setUp(self):
d = tempfile.mkdtemp()
d = os.path.realpath(tempfile.mkdtemp())
self.basetempdir = d
self.tempdir = os.path.join(d, 'subdir')

@@ -164,6 +226,10 @@ class _TestCases(unittest.TestCase):
shutil.rmtree(self.basetempdir)
self.tempdir = None

def test_mdbase(self):
self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'unknosldkfj' })
self.assertRaises(ValueError, MDBase.create_obj, { 'type': 'metadata' })

def test_makehash(self):
self.assertRaises(ValueError, ObjectStore.makehash, 'slkj')
self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA')
@@ -176,11 +242,20 @@ class _TestCases(unittest.TestCase):
ftest = files[0]
fname = 'test.txt'

oldid = ftest.id
self.assertEqual(ftest.filename, fname)
self.assertEqual(ftest.dir, self.tempdir)
self.assertEqual(ftest.id, uuid.uuid5(uuid.NAMESPACE_URL,
'someurl' + '/'.join(os.path.split(self.tempdir) +
# XXX - do we add host information?
self.assertEqual(ftest.id, uuid.uuid5(_NAMESPACE_MEDASHARE_PATH,
'/'.join(os.path.split(self.tempdir) +
( fname, ))))
self.assertEqual(ftest.mtime, datetime.datetime(2019, 5, 20, 21, 47, 36))
self.assertEqual(ftest.size, 15)
self.assertIn('sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f', ftest.hashes)

# XXX - make sure works w/ relative dirs
files = enumeratedir(os.path.relpath(self.tempdir))
self.assertEqual(oldid, files[0].id)

def test_objectstore(self):
objst = ObjectStore()
@@ -190,6 +265,7 @@ class _TestCases(unittest.TestCase):
objst.loadobj({
'type': 'metadata',
'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7',
'modified': datetime.datetime(2019, 5, 31, 14, 3, 10),
'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ],
'lang': 'en',
})
@@ -199,6 +275,7 @@ class _TestCases(unittest.TestCase):

byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96')

self.assertIsInstance(byid, MetaData)
self.assertIn(byid, lst)

r = byid
@@ -206,11 +283,14 @@ class _TestCases(unittest.TestCase):
self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96')
self.assertEqual(r['dc:author'], 'John-Mark Gurney')

objst.store('testfile.pasn1')
fname = 'testfile.pasn1'
objst.store(fname)

with open('testfile.pasn1') as fp:
with open(fname) as fp:
objs = _asn1coder.loads(fp.read())

os.unlink(fname)

self.assertEqual(len(objs), len(objst))

for i in objs:


Loading…
Cancel
Save