Browse Source

add support for writing data to the cache.. this involves a minor

cache reorg, and we now store files by hash, and in subdirs...

also import urlparse as we use it a lot...
main
John-Mark Gurney 4 years ago
parent
commit
17256546c9
1 changed files with 65 additions and 22 deletions
  1. +65
    -22
      casimport/__init__.py

+ 65
- 22
casimport/__init__.py View File

@@ -39,6 +39,7 @@ import urllib.request

from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec
from urllib.parse import urlparse

__author__ = 'John-Mark Gurney'
__copyright__ = 'Copyright 2020 John-Mark Gurney. All rights reserved.'
@@ -180,10 +181,26 @@ class FileDirCAS(object):
'''Internal method to refresh the internal cache of
hashes.'''

for i in glob.glob(os.path.join(self._path, '*.py')):
for i in glob.glob(os.path.join(self._path, '?/*.py')):
_, hash = self.read_hash_file(i)
self._hashes[hash] = i

def add_file(self, fname):
'''Note: this is primarily a testing function.'''

with open(fname, 'rb') as fp:
data = fp.read()

hash = hashlib.sha256(data).hexdigest()

self.write_cache(hash, data)

def write_cache(self, hash, data):
d = self._path / hash[0]
d.mkdir()
with open(d / (hash + '.py'), 'wb') as fp:
fp.write(data)

@staticmethod
def read_hash_file(fname):
'''Helper function that will read the file at fname, and
@@ -261,7 +278,7 @@ class CASFinder(MetaPathFinder, Loader):
@staticmethod
def _makebasichashurl(url):
try:
hashurl = urllib.parse.urlparse(url)
hashurl = urlparse(url)
except AttributeError:
hashurl = url
return urllib.parse.urlunparse(hashurl[:3] + ('', '', ''))
@@ -324,7 +341,7 @@ class CASFinder(MetaPathFinder, Loader):
# make hash url:
hashurl = ('hash://sha256/%s' %
bytes.fromhex(arg).hex())
hashurl = urllib.parse.urlparse(hashurl)
hashurl = urlparse(hashurl)
for l in self._loaders:
ispkg = l.is_package(hashurl)
break
@@ -333,7 +350,7 @@ class CASFinder(MetaPathFinder, Loader):
else:
# an alias
for i in self._aliases[arg]:
hashurl = urllib.parse.urlparse(i)
hashurl = urlparse(i)
if hashurl.scheme == 'hash':
break
else:
@@ -367,7 +384,7 @@ class CASFinder(MetaPathFinder, Loader):
else:
for url in self._aliases[
self._makebasichashurl(url)]:
url = urllib.parse.urlparse(url)
url = urlparse(url)
for load in self._loaders:
try:
data = load.fetch_data(url)
@@ -511,6 +528,25 @@ class Test(unittest.TestCase):

self.assertTrue(cachedir.exists())

with open(self.fixtures / 'hello.py', 'rb') as fp:
# that when hello.py's data
hellodata = fp.read()

# is hashed
hellohash = hashlib.sha256(hellodata).hexdigest()

# and written to the cache
fd.write_cache(hellohash, hellodata)

# that the file exists in the correct place
self.assertTrue((cachedir / hellohash[0] / (hellohash + '.py')).exists())

# and that when fetched
data = fd.fetch_data(urlparse('hash://sha256/%s' % hellohash))

# it matches what was passed
self.assertEqual(data, hellodata)

def test_filedircas_limit_refresh(self):
# XXX - only refresh when the dir has changed, and each
# file has changed
@@ -520,12 +556,19 @@ class Test(unittest.TestCase):
# That a CASFinder
f = CASFinder()

cachedir = self.tempdir / 'cache'
cachedir.mkdir()

# make sure that we can't import anything at first
with self.assertRaises(ImportError):
import cas.v1_f_2398472398

# when registering the fixtures directory
f.register(FileDirCAS(self.fixtures))
# when registering the cache directory
fdc = FileDirCAS(cachedir)
f.register(fdc)

# and adding the hello.py file
fdc.add_file(self.fixtures / 'hello.py')

# can import the function
from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
@@ -578,7 +621,7 @@ class Test(unittest.TestCase):
ipfsconf = dict(gateway=gwurl)
ipfsobj = IPFSCAS.fromconfig(ipfsconf)

self.assertEqual(ipfsobj.make_url(urllib.parse.urlparse('ipfs://someobj')),
self.assertEqual(ipfsobj.make_url(urlparse('ipfs://someobj')),
'https://www.example.com/somepath/someobj')

def test_defaultinit(self):
@@ -600,8 +643,8 @@ class Test(unittest.TestCase):
# that the cache got created
self.assertTrue(defcachedir.is_dir())

# and that when hello.py is copied to the cache
shutil.copy(self.fixtures / 'hello.py', defcachedir)
# and that when hello.py is added to the cache
f._loaders[0].add_file(self.fixtures / 'hello.py')

# it can be imported
from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
@@ -651,11 +694,11 @@ class Test(unittest.TestCase):
with CASFinder() as f, \
tempattrset(sys.modules[__name__],
'load_mod_aliases', f.load_mod_aliases):
f.register(FileDirCAS(cachedir))
fdc = FileDirCAS(cachedir)
f.register(fdc)

# and that hello.py is in the cache
shutil.copy(self.fixtures / 'hello.py',
cachedir)
fdc.add_file(self.fixtures / 'hello.py')

# and that the aliases are loaded
with open(self.fixtures / 'randpkg' / 'cas_aliases.txt') as fp:
@@ -695,11 +738,11 @@ class Test(unittest.TestCase):
with CASFinder() as f, \
tempattrset(sys.modules[__name__],
'load_mod_aliases', f.load_mod_aliases):
f.register(FileDirCAS(cachedir))
fdc = FileDirCAS(cachedir)
f.register(fdc)

# and that hello.py is in the cache
shutil.copy(self.fixtures / 'hello.py',
cachedir)
fdc.add_file(self.fixtures / 'hello.py')

self.assertNotIn('randpkg', sys.modules)

@@ -768,7 +811,7 @@ class Test(unittest.TestCase):
uomock.return_value.__enter__.return_value.read.return_value = ipfsdata

# that when called
hashurl = urllib.parse.urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym')
hashurl = urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym')
data = ipfs.fetch_data(hashurl)

# it opens the correct url
@@ -779,7 +822,7 @@ class Test(unittest.TestCase):

with self.assertRaises(ValueError):
# that a hash url fails
ipfs.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj'))
ipfs.fetch_data(urlparse('hash://sha256/asldfkj'))

# that when the request fails
uomock.return_value.__enter__.return_value.status = 400
@@ -806,7 +849,7 @@ class Test(unittest.TestCase):
uomock.return_value.__enter__.return_value.read.return_value = httpsdata

# that when called
hashurl = urllib.parse.urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py')
hashurl = urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py')
data = httpsldr.fetch_data(hashurl)

# it opens the correct url
@@ -817,7 +860,7 @@ class Test(unittest.TestCase):

with self.assertRaises(ValueError):
# that a hash url fails
httpsldr.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj'))
httpsldr.fetch_data(urlparse('hash://sha256/asldfkj'))

# that when the request fails
uomock.return_value.__enter__.return_value.status = 400
@@ -856,8 +899,8 @@ class Test(unittest.TestCase):

# that when read raises an exception
with self.assertRaises(ValueError):
fdcas.fetch_data(urllib.parse.urlparse('hash://sha256/0000'))
fdcas.fetch_data(urlparse('hash://sha256/0000'))

# that when passed an invalid url
with self.assertRaises(ValueError):
fdcas.fetch_data(urllib.parse.urlparse('https://sha256/0000'))
fdcas.fetch_data(urlparse('https://sha256/0000'))

Loading…
Cancel
Save