Browse Source

add support for writing data to the cache.. this involves a minor

cache reorg, and we now store files by hash, and in subdirs...

also import urlparse as we use it a lot...
main
John-Mark Gurney 4 years ago
parent
commit
17256546c9
1 changed files with 65 additions and 22 deletions
  1. +65
    -22
      casimport/__init__.py

+ 65
- 22
casimport/__init__.py View File

@@ -39,6 +39,7 @@ import urllib.request


from importlib.abc import MetaPathFinder, Loader from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec from importlib.machinery import ModuleSpec
from urllib.parse import urlparse


__author__ = 'John-Mark Gurney' __author__ = 'John-Mark Gurney'
__copyright__ = 'Copyright 2020 John-Mark Gurney. All rights reserved.' __copyright__ = 'Copyright 2020 John-Mark Gurney. All rights reserved.'
@@ -180,10 +181,26 @@ class FileDirCAS(object):
'''Internal method to refresh the internal cache of '''Internal method to refresh the internal cache of
hashes.''' hashes.'''


for i in glob.glob(os.path.join(self._path, '*.py')):
for i in glob.glob(os.path.join(self._path, '?/*.py')):
_, hash = self.read_hash_file(i) _, hash = self.read_hash_file(i)
self._hashes[hash] = i self._hashes[hash] = i


def add_file(self, fname):
'''Note: this is primarily a testing function.'''

with open(fname, 'rb') as fp:
data = fp.read()

hash = hashlib.sha256(data).hexdigest()

self.write_cache(hash, data)

def write_cache(self, hash, data):
d = self._path / hash[0]
d.mkdir()
with open(d / (hash + '.py'), 'wb') as fp:
fp.write(data)

@staticmethod @staticmethod
def read_hash_file(fname): def read_hash_file(fname):
'''Helper function that will read the file at fname, and '''Helper function that will read the file at fname, and
@@ -261,7 +278,7 @@ class CASFinder(MetaPathFinder, Loader):
@staticmethod @staticmethod
def _makebasichashurl(url): def _makebasichashurl(url):
try: try:
hashurl = urllib.parse.urlparse(url)
hashurl = urlparse(url)
except AttributeError: except AttributeError:
hashurl = url hashurl = url
return urllib.parse.urlunparse(hashurl[:3] + ('', '', '')) return urllib.parse.urlunparse(hashurl[:3] + ('', '', ''))
@@ -324,7 +341,7 @@ class CASFinder(MetaPathFinder, Loader):
# make hash url: # make hash url:
hashurl = ('hash://sha256/%s' % hashurl = ('hash://sha256/%s' %
bytes.fromhex(arg).hex()) bytes.fromhex(arg).hex())
hashurl = urllib.parse.urlparse(hashurl)
hashurl = urlparse(hashurl)
for l in self._loaders: for l in self._loaders:
ispkg = l.is_package(hashurl) ispkg = l.is_package(hashurl)
break break
@@ -333,7 +350,7 @@ class CASFinder(MetaPathFinder, Loader):
else: else:
# an alias # an alias
for i in self._aliases[arg]: for i in self._aliases[arg]:
hashurl = urllib.parse.urlparse(i)
hashurl = urlparse(i)
if hashurl.scheme == 'hash': if hashurl.scheme == 'hash':
break break
else: else:
@@ -367,7 +384,7 @@ class CASFinder(MetaPathFinder, Loader):
else: else:
for url in self._aliases[ for url in self._aliases[
self._makebasichashurl(url)]: self._makebasichashurl(url)]:
url = urllib.parse.urlparse(url)
url = urlparse(url)
for load in self._loaders: for load in self._loaders:
try: try:
data = load.fetch_data(url) data = load.fetch_data(url)
@@ -511,6 +528,25 @@ class Test(unittest.TestCase):


self.assertTrue(cachedir.exists()) self.assertTrue(cachedir.exists())


with open(self.fixtures / 'hello.py', 'rb') as fp:
# that when hello.py's data
hellodata = fp.read()

# is hashed
hellohash = hashlib.sha256(hellodata).hexdigest()

# and written to the cache
fd.write_cache(hellohash, hellodata)

# that the file exists in the correct place
self.assertTrue((cachedir / hellohash[0] / (hellohash + '.py')).exists())

# and that when fetched
data = fd.fetch_data(urlparse('hash://sha256/%s' % hellohash))

# it matches what was passed
self.assertEqual(data, hellodata)

def test_filedircas_limit_refresh(self): def test_filedircas_limit_refresh(self):
# XXX - only refresh when the dir has changed, and each # XXX - only refresh when the dir has changed, and each
# file has changed # file has changed
@@ -520,12 +556,19 @@ class Test(unittest.TestCase):
# That a CASFinder # That a CASFinder
f = CASFinder() f = CASFinder()


cachedir = self.tempdir / 'cache'
cachedir.mkdir()

# make sure that we can't import anything at first # make sure that we can't import anything at first
with self.assertRaises(ImportError): with self.assertRaises(ImportError):
import cas.v1_f_2398472398 import cas.v1_f_2398472398


# when registering the fixtures directory
f.register(FileDirCAS(self.fixtures))
# when registering the cache directory
fdc = FileDirCAS(cachedir)
f.register(fdc)

# and adding the hello.py file
fdc.add_file(self.fixtures / 'hello.py')


# can import the function # can import the function
from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
@@ -578,7 +621,7 @@ class Test(unittest.TestCase):
ipfsconf = dict(gateway=gwurl) ipfsconf = dict(gateway=gwurl)
ipfsobj = IPFSCAS.fromconfig(ipfsconf) ipfsobj = IPFSCAS.fromconfig(ipfsconf)


self.assertEqual(ipfsobj.make_url(urllib.parse.urlparse('ipfs://someobj')),
self.assertEqual(ipfsobj.make_url(urlparse('ipfs://someobj')),
'https://www.example.com/somepath/someobj') 'https://www.example.com/somepath/someobj')


def test_defaultinit(self): def test_defaultinit(self):
@@ -600,8 +643,8 @@ class Test(unittest.TestCase):
# that the cache got created # that the cache got created
self.assertTrue(defcachedir.is_dir()) self.assertTrue(defcachedir.is_dir())


# and that when hello.py is copied to the cache
shutil.copy(self.fixtures / 'hello.py', defcachedir)
# and that when hello.py is added to the cache
f._loaders[0].add_file(self.fixtures / 'hello.py')


# it can be imported # it can be imported
from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
@@ -651,11 +694,11 @@ class Test(unittest.TestCase):
with CASFinder() as f, \ with CASFinder() as f, \
tempattrset(sys.modules[__name__], tempattrset(sys.modules[__name__],
'load_mod_aliases', f.load_mod_aliases): 'load_mod_aliases', f.load_mod_aliases):
f.register(FileDirCAS(cachedir))
fdc = FileDirCAS(cachedir)
f.register(fdc)


# and that hello.py is in the cache # and that hello.py is in the cache
shutil.copy(self.fixtures / 'hello.py',
cachedir)
fdc.add_file(self.fixtures / 'hello.py')


# and that the aliases are loaded # and that the aliases are loaded
with open(self.fixtures / 'randpkg' / 'cas_aliases.txt') as fp: with open(self.fixtures / 'randpkg' / 'cas_aliases.txt') as fp:
@@ -695,11 +738,11 @@ class Test(unittest.TestCase):
with CASFinder() as f, \ with CASFinder() as f, \
tempattrset(sys.modules[__name__], tempattrset(sys.modules[__name__],
'load_mod_aliases', f.load_mod_aliases): 'load_mod_aliases', f.load_mod_aliases):
f.register(FileDirCAS(cachedir))
fdc = FileDirCAS(cachedir)
f.register(fdc)


# and that hello.py is in the cache # and that hello.py is in the cache
shutil.copy(self.fixtures / 'hello.py',
cachedir)
fdc.add_file(self.fixtures / 'hello.py')


self.assertNotIn('randpkg', sys.modules) self.assertNotIn('randpkg', sys.modules)


@@ -768,7 +811,7 @@ class Test(unittest.TestCase):
uomock.return_value.__enter__.return_value.read.return_value = ipfsdata uomock.return_value.__enter__.return_value.read.return_value = ipfsdata


# that when called # that when called
hashurl = urllib.parse.urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym')
hashurl = urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym')
data = ipfs.fetch_data(hashurl) data = ipfs.fetch_data(hashurl)


# it opens the correct url # it opens the correct url
@@ -779,7 +822,7 @@ class Test(unittest.TestCase):


with self.assertRaises(ValueError): with self.assertRaises(ValueError):
# that a hash url fails # that a hash url fails
ipfs.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj'))
ipfs.fetch_data(urlparse('hash://sha256/asldfkj'))


# that when the request fails # that when the request fails
uomock.return_value.__enter__.return_value.status = 400 uomock.return_value.__enter__.return_value.status = 400
@@ -806,7 +849,7 @@ class Test(unittest.TestCase):
uomock.return_value.__enter__.return_value.read.return_value = httpsdata uomock.return_value.__enter__.return_value.read.return_value = httpsdata


# that when called # that when called
hashurl = urllib.parse.urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py')
hashurl = urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py')
data = httpsldr.fetch_data(hashurl) data = httpsldr.fetch_data(hashurl)


# it opens the correct url # it opens the correct url
@@ -817,7 +860,7 @@ class Test(unittest.TestCase):


with self.assertRaises(ValueError): with self.assertRaises(ValueError):
# that a hash url fails # that a hash url fails
httpsldr.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj'))
httpsldr.fetch_data(urlparse('hash://sha256/asldfkj'))


# that when the request fails # that when the request fails
uomock.return_value.__enter__.return_value.status = 400 uomock.return_value.__enter__.return_value.status = 400
@@ -856,8 +899,8 @@ class Test(unittest.TestCase):


# that when read raises an exception # that when read raises an exception
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
fdcas.fetch_data(urllib.parse.urlparse('hash://sha256/0000'))
fdcas.fetch_data(urlparse('hash://sha256/0000'))


# that when passed an invalid url # that when passed an invalid url
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
fdcas.fetch_data(urllib.parse.urlparse('https://sha256/0000'))
fdcas.fetch_data(urlparse('https://sha256/0000'))

Loading…
Cancel
Save