From 17256546c9d4c822d093e89d18014d70d10234e0 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Fri, 14 Feb 2020 16:51:40 -0800 Subject: [PATCH] add support for writing data to the cache.. this involves a minor cache reorg, and we now store files by hash, and in subdirs... also import urlparse as we use it a lot... --- casimport/__init__.py | 87 ++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/casimport/__init__.py b/casimport/__init__.py index 95dc93f..8387df1 100644 --- a/casimport/__init__.py +++ b/casimport/__init__.py @@ -39,6 +39,7 @@ import urllib.request from importlib.abc import MetaPathFinder, Loader from importlib.machinery import ModuleSpec +from urllib.parse import urlparse __author__ = 'John-Mark Gurney' __copyright__ = 'Copyright 2020 John-Mark Gurney. All rights reserved.' @@ -180,10 +181,26 @@ class FileDirCAS(object): '''Internal method to refresh the internal cache of hashes.''' - for i in glob.glob(os.path.join(self._path, '*.py')): + for i in glob.glob(os.path.join(self._path, '?/*.py')): _, hash = self.read_hash_file(i) self._hashes[hash] = i + def add_file(self, fname): + '''Note: this is primarily a testing function.''' + + with open(fname, 'rb') as fp: + data = fp.read() + + hash = hashlib.sha256(data).hexdigest() + + self.write_cache(hash, data) + + def write_cache(self, hash, data): + d = self._path / hash[0] + d.mkdir() + with open(d / (hash + '.py'), 'wb') as fp: + fp.write(data) + @staticmethod def read_hash_file(fname): '''Helper function that will read the file at fname, and @@ -261,7 +278,7 @@ class CASFinder(MetaPathFinder, Loader): @staticmethod def _makebasichashurl(url): try: - hashurl = urllib.parse.urlparse(url) + hashurl = urlparse(url) except AttributeError: hashurl = url return urllib.parse.urlunparse(hashurl[:3] + ('', '', '')) @@ -324,7 +341,7 @@ class CASFinder(MetaPathFinder, Loader): # make hash url: hashurl = ('hash://sha256/%s' % bytes.fromhex(arg).hex()) - hashurl = urllib.parse.urlparse(hashurl) + hashurl = urlparse(hashurl) for l in self._loaders: ispkg = l.is_package(hashurl) break @@ -333,7 +350,7 @@ class CASFinder(MetaPathFinder, Loader): else: # an alias for i in self._aliases[arg]: - hashurl = urllib.parse.urlparse(i) + hashurl = urlparse(i) if hashurl.scheme == 'hash': break else: @@ -367,7 +384,7 @@ class CASFinder(MetaPathFinder, Loader): else: for url in self._aliases[ self._makebasichashurl(url)]: - url = urllib.parse.urlparse(url) + url = urlparse(url) for load in self._loaders: try: data = load.fetch_data(url) @@ -511,6 +528,25 @@ class Test(unittest.TestCase): self.assertTrue(cachedir.exists()) + with open(self.fixtures / 'hello.py', 'rb') as fp: + # that when hello.py's data + hellodata = fp.read() + + # is hashed + hellohash = hashlib.sha256(hellodata).hexdigest() + + # and written to the cache + fd.write_cache(hellohash, hellodata) + + # that the file exists in the correct place + self.assertTrue((cachedir / hellohash[0] / (hellohash + '.py')).exists()) + + # and that when fetched + data = fd.fetch_data(urlparse('hash://sha256/%s' % hellohash)) + + # it matches what was passed + self.assertEqual(data, hellodata) + def test_filedircas_limit_refresh(self): # XXX - only refresh when the dir has changed, and each # file has changed @@ -520,12 +556,19 @@ class Test(unittest.TestCase): # That a CASFinder f = CASFinder() + cachedir = self.tempdir / 'cache' + cachedir.mkdir() + # make sure that we can't import anything at first with self.assertRaises(ImportError): import cas.v1_f_2398472398 - # when registering the fixtures directory - f.register(FileDirCAS(self.fixtures)) + # when registering the cache directory + fdc = FileDirCAS(cachedir) + f.register(fdc) + + # and adding the hello.py file + fdc.add_file(self.fixtures / 'hello.py') # can import the function from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello @@ -578,7 +621,7 @@ class Test(unittest.TestCase): ipfsconf = dict(gateway=gwurl) ipfsobj = IPFSCAS.fromconfig(ipfsconf) - self.assertEqual(ipfsobj.make_url(urllib.parse.urlparse('ipfs://someobj')), + self.assertEqual(ipfsobj.make_url(urlparse('ipfs://someobj')), 'https://www.example.com/somepath/someobj') def test_defaultinit(self): @@ -600,8 +643,8 @@ class Test(unittest.TestCase): # that the cache got created self.assertTrue(defcachedir.is_dir()) - # and that when hello.py is copied to the cache - shutil.copy(self.fixtures / 'hello.py', defcachedir) + # and that when hello.py is added to the cache + f._loaders[0].add_file(self.fixtures / 'hello.py') # it can be imported from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello @@ -651,11 +694,11 @@ class Test(unittest.TestCase): with CASFinder() as f, \ tempattrset(sys.modules[__name__], 'load_mod_aliases', f.load_mod_aliases): - f.register(FileDirCAS(cachedir)) + fdc = FileDirCAS(cachedir) + f.register(fdc) # and that hello.py is in the cache - shutil.copy(self.fixtures / 'hello.py', - cachedir) + fdc.add_file(self.fixtures / 'hello.py') # and that the aliases are loaded with open(self.fixtures / 'randpkg' / 'cas_aliases.txt') as fp: @@ -695,11 +738,11 @@ class Test(unittest.TestCase): with CASFinder() as f, \ tempattrset(sys.modules[__name__], 'load_mod_aliases', f.load_mod_aliases): - f.register(FileDirCAS(cachedir)) + fdc = FileDirCAS(cachedir) + f.register(fdc) # and that hello.py is in the cache - shutil.copy(self.fixtures / 'hello.py', - cachedir) + fdc.add_file(self.fixtures / 'hello.py') self.assertNotIn('randpkg', sys.modules) @@ -768,7 +811,7 @@ class Test(unittest.TestCase): uomock.return_value.__enter__.return_value.read.return_value = ipfsdata # that when called - hashurl = urllib.parse.urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym') + hashurl = urlparse('ipfs://bafkreibtbcckul7lwxqz7nyzj3dknhwrdxj5o4jc6gsroxxjhzz46albym') data = ipfs.fetch_data(hashurl) # it opens the correct url @@ -779,7 +822,7 @@ class Test(unittest.TestCase): with self.assertRaises(ValueError): # that a hash url fails - ipfs.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj')) + ipfs.fetch_data(urlparse('hash://sha256/asldfkj')) # that when the request fails uomock.return_value.__enter__.return_value.status = 400 @@ -806,7 +849,7 @@ class Test(unittest.TestCase): uomock.return_value.__enter__.return_value.read.return_value = httpsdata # that when called - hashurl = urllib.parse.urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py') + hashurl = urlparse('https://www.funkthat.com/gitea/jmg/casimport/raw/commit/753e64f53c73d9d1afc4d8a617edb9d3542dcea2/fixtures/hello.py') data = httpsldr.fetch_data(hashurl) # it opens the correct url @@ -817,7 +860,7 @@ class Test(unittest.TestCase): with self.assertRaises(ValueError): # that a hash url fails - httpsldr.fetch_data(urllib.parse.urlparse('hash://sha256/asldfkj')) + httpsldr.fetch_data(urlparse('hash://sha256/asldfkj')) # that when the request fails uomock.return_value.__enter__.return_value.status = 400 @@ -856,8 +899,8 @@ class Test(unittest.TestCase): # that when read raises an exception with self.assertRaises(ValueError): - fdcas.fetch_data(urllib.parse.urlparse('hash://sha256/0000')) + fdcas.fetch_data(urlparse('hash://sha256/0000')) # that when passed an invalid url with self.assertRaises(ValueError): - fdcas.fetch_data(urllib.parse.urlparse('https://sha256/0000')) + fdcas.fetch_data(urlparse('https://sha256/0000'))