@@ -0,0 +1,31 @@ | |||||
[ | |||||
{ | |||||
"title": "gen ident", | |||||
"cmd": [ "genident", "name=A Test User" ], | |||||
"exit": 0 | |||||
}, | |||||
{ | |||||
"special": "setup tar file" | |||||
}, | |||||
{ | |||||
"title": "import tar.gz container", | |||||
"cmd": [ "container", "testfile.tar.gz" ] | |||||
}, | |||||
{ | |||||
"special": "verify store object cnt", | |||||
"comment": "should have one container and one file", | |||||
"count": 2 | |||||
}, | |||||
{ | |||||
"title": "verify correct files imported", | |||||
"cmd": [ "dump" ], | |||||
"stdout_check": [ | |||||
{ "type": "identity" }, | |||||
{ "files": [ "testfiles/newfile.txt", "testfiles/test.txt" ], | |||||
"hashes": [ "sha512:90f8342520f0ac57fb5a779f5d331c2fa87aa40f8799940257f9ba619940951e67143a8d746535ed0284924b2b7bc1478f095198800ba96d01847d7b56ca465c", "sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f" ], | |||||
"type": "container", | |||||
"uri": "hash://sha512/79fab684ca73e25994c1b739dcf8f03acf27dff74d63b4b3affd9aa69fbb37d23794b723802cad131969225403846f8f8c470bc2432c32de34d39f044a360073" }, | |||||
{ "type": "file", "hashes": [ "sha512:79fab684ca73e25994c1b739dcf8f03acf27dff74d63b4b3affd9aa69fbb37d23794b723802cad131969225403846f8f8c470bc2432c32de34d39f044a360073" ] } | |||||
] | |||||
} | |||||
] |
@@ -18,17 +18,7 @@ if False: | |||||
logging.getLogger('sqlalchemy').addHandler(_handler) | logging.getLogger('sqlalchemy').addHandler(_handler) | ||||
logging.getLogger('sqlalchemy.engine').setLevel(lvl) | logging.getLogger('sqlalchemy.engine').setLevel(lvl) | ||||
def _debprint(*args): # pragma: no cover | |||||
import traceback, sys, os.path | |||||
st = traceback.extract_stack(limit=2)[0] | |||||
sep = '' | |||||
if args: | |||||
sep = ':' | |||||
print('%s:%d%s' % (os.path.basename(st.filename), st.lineno, sep), | |||||
*args, file=_real_stderr) | |||||
sys.stderr.flush() | |||||
from .utils import _debprint | |||||
#import pdb, sys; mypdb = pdb.Pdb(stdout=sys.stderr); mypdb.set_trace() | #import pdb, sys; mypdb = pdb.Pdb(stdout=sys.stderr); mypdb.set_trace() | ||||
@@ -38,6 +28,7 @@ from unittest import mock | |||||
from .hostid import hostuuid | from .hostid import hostuuid | ||||
from .tags import TagCache | from .tags import TagCache | ||||
from . import orm | from . import orm | ||||
from .magic_wrap import detect_from_filename | |||||
from .btv import _TestCases as bttestcase, validate_file | from .btv import _TestCases as bttestcase, validate_file | ||||
@@ -52,6 +43,7 @@ import importlib | |||||
import io | import io | ||||
import itertools | import itertools | ||||
import json | import json | ||||
import libarchive | |||||
import magic | import magic | ||||
import os.path | import os.path | ||||
import pathlib | import pathlib | ||||
@@ -78,6 +70,11 @@ _validhashes = set([ 'sha256', 'sha512' ]) | |||||
_hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in | _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in | ||||
_validhashes } | _validhashes } | ||||
def _makehashuri(hashstr): | |||||
hash, value = ObjectStore.makehash(hashstr).split(':') | |||||
return f'hash://{hash}/{value}' | |||||
def _keyordering(x): | def _keyordering(x): | ||||
k, v = x | k, v = x | ||||
try: | try: | ||||
@@ -651,16 +648,20 @@ class ObjectStore(object): | |||||
def _readfp(fp): | def _readfp(fp): | ||||
while True: | while True: | ||||
r = fp.read(64*1024) | r = fp.read(64*1024) | ||||
if r == b'': | |||||
# libarchive returns None on EOF | |||||
if r == b'' or r is None: | |||||
return | return | ||||
yield r | yield r | ||||
def _hashfile(fname): | def _hashfile(fname): | ||||
hash = getattr(hashlib, _defaulthash)() | |||||
with open(fname, 'rb') as fp: | with open(fname, 'rb') as fp: | ||||
for r in _readfp(fp): | |||||
hash.update(r) | |||||
return _hashfp(fp) | |||||
def _hashfp(fp): | |||||
hash = getattr(hashlib, _defaulthash)() | |||||
for r in _readfp(fp): | |||||
hash.update(r) | |||||
return '%s:%s' % (_defaulthash, hash.hexdigest()) | return '%s:%s' % (_defaulthash, hash.hexdigest()) | ||||
@@ -1219,7 +1220,7 @@ def cmd_dump(options, persona, objstr, cache): | |||||
def cmd_auto(options): | def cmd_auto(options): | ||||
for i in options.files: | for i in options.files: | ||||
mf = magic.detect_from_filename(i) | |||||
mf = detect_from_filename(i) | |||||
primary = mf[0].split('/', 1)[0] | primary = mf[0].split('/', 1)[0] | ||||
mt = mf[0] | mt = mf[0] | ||||
@@ -1261,68 +1262,113 @@ def cmd_list(options, persona, objstr, cache): | |||||
# This is needed so that if it creates a FileObj, which may be | # This is needed so that if it creates a FileObj, which may be | ||||
# expensive (hashing large file), that it gets saved. | # expensive (hashing large file), that it gets saved. | ||||
@init_datastructs | |||||
def cmd_container(options, persona, objstr, cache): | |||||
for i in options.files: | |||||
with open(i, 'rb') as fp: | |||||
torrent = bencode.bdecode(fp.read()) | |||||
bencodedinfo = bencode.bencode(torrent['info']) | |||||
infohash = hashlib.sha1(bencodedinfo).hexdigest() | |||||
def handle_bittorrent(fname, persona, objstr): | |||||
with open(fname, 'rb') as fp: | |||||
torrent = bencode.bdecode(fp.read()) | |||||
bencodedinfo = bencode.bencode(torrent['info']) | |||||
infohash = hashlib.sha1(bencodedinfo).hexdigest() | |||||
# XXX - not entirely happy w/ URI | |||||
uri = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, | |||||
torrent['info']['name'].decode('utf-8')) | |||||
# XXX - not entirely happy w/ URI | |||||
uri = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, | |||||
torrent['info']['name'].decode('utf-8')) | |||||
try: | |||||
cont = objstr.by_id(Container.make_id(uri)) | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
if not 'incomplete' in cont: | |||||
print('Warning, container already complete, skipping %s.' % repr(fname), file=sys.stderr) | |||||
return | |||||
good, bad = validate_file(fname) | |||||
if bad: | |||||
print('Warning, incomple/invalid files, not added for %s:' % | |||||
repr(fname), file=sys.stderr) | |||||
print('\n'.join('\t%s' % | |||||
repr(str(pathlib.Path(*x.parts[1:]))) for x in | |||||
sorted(bad)), file=sys.stderr) | |||||
files = [] | |||||
hashes = [] | |||||
for j in sorted(good): | |||||
files.append(str(pathlib.PosixPath(*j.parts[1:]))) | |||||
try: | try: | ||||
cont = objstr.by_id(Container.make_id(uri)) | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
if not 'incomplete' in cont: | |||||
print('Warning, container already complete, skipping %s.' % repr(i), file=sys.stderr) | |||||
continue | |||||
fobj = objstr.by_file(j, ('file',))[0] | |||||
except: | |||||
fobj = persona.by_file(j) | |||||
objstr.loadobj(fobj) | |||||
good, bad = validate_file(i) | |||||
# XXX - ensure only one is added? | |||||
hashes.extend(fobj.hashes) | |||||
if bad: | |||||
print('Warning, incomple/invalid files, not added for %s:' % repr(i), | |||||
file=sys.stderr) | |||||
print('\n'.join('\t%s' % | |||||
repr(str(pathlib.Path(*x.parts[1:]))) for x in | |||||
sorted(bad)), file=sys.stderr) | |||||
kwargs = dict(files=files, hashes=hashes, | |||||
uri=uri) | |||||
if bad: | |||||
kwargs['incomplete'] = True | |||||
# XXX - doesn't combine files/hashes, that is if a | |||||
# Container has one set of good files, and then the | |||||
# next scan has a different set, only the second set | |||||
# will be present, not any from the first set. | |||||
try: | |||||
cont = objstr.by_id(Container.make_id(uri)) | |||||
cont = cont.new_version(dels=() if bad | |||||
else ('incomplete',), replaces=kwargs.items()) | |||||
except KeyError: | |||||
cont = persona.Container(**kwargs) | |||||
objstr.loadobj(cont) | |||||
def handle_archive(fname, persona, objstr): | |||||
with libarchive.Archive(fname) as arch: | |||||
files = [] | files = [] | ||||
hashes = [] | hashes = [] | ||||
for j in sorted(good): | |||||
files.append(str(pathlib.PosixPath(*j.parts[1:]))) | |||||
try: | |||||
fobj = objstr.by_file(j, ('file',))[0] | |||||
except: | |||||
fobj = persona.by_file(j) | |||||
objstr.loadobj(fobj) | |||||
# XXX - ensure only one is added? | |||||
hashes.extend(fobj.hashes) | |||||
for i in arch: | |||||
if not i.isfile(): | |||||
continue | |||||
kwargs = dict(files=files, hashes=hashes, | |||||
uri=uri) | |||||
files.append(i.pathname) | |||||
if bad: | |||||
kwargs['incomplete'] = True | |||||
with arch.readstream(i.size) as fp: | |||||
hashes.append(_hashfp(fp)) | |||||
# XXX - doesn't combine files/hashes, that is if a | |||||
# Container has one set of good files, and then the | |||||
# next scan has a different set, only the second set | |||||
# will be present, not any from the first set. | |||||
try: | |||||
fobj = objstr.by_file(fname, ('file',))[0] | |||||
except: | |||||
fobj = persona.by_file(fname) | |||||
objstr.loadobj(fobj) | |||||
try: | |||||
cont = objstr.by_id(Container.make_id(uri)) | |||||
cont = cont.new_version(dels=() if bad | |||||
else ('incomplete',), replaces=kwargs.items()) | |||||
except KeyError: | |||||
cont = persona.Container(**kwargs) | |||||
uri = _makehashuri(fobj.hashes[0]) | |||||
kwargs = dict(files=files, hashes=hashes, | |||||
uri=uri) | |||||
try: | |||||
cont = objstr.by_id(Container.make_id(uri)) | |||||
# XXX - only update when different, check uri | |||||
cont = cont.new_version(replaces=kwargs.items()) | |||||
except KeyError: | |||||
cont = persona.Container(**kwargs) | |||||
objstr.loadobj(cont) | |||||
_container_mapping = { | |||||
'application/x-bittorrent': handle_bittorrent, | |||||
'application/x-tar': handle_archive, | |||||
} | |||||
@init_datastructs | |||||
def cmd_container(options, persona, objstr, cache): | |||||
for i in options.files: | |||||
mf = detect_from_filename(i) | |||||
#_debprint('mf:', repr(mf)) | |||||
fun = _container_mapping[mf.mime_type] | |||||
objstr.loadobj(cont) | |||||
fun(i, persona, objstr) | |||||
def _json_objstream(fp): | def _json_objstream(fp): | ||||
inp = fp.read() | inp = fp.read() | ||||
@@ -2135,6 +2181,9 @@ class _TestCases(unittest.TestCase): | |||||
elif special == 'delete files': | elif special == 'delete files': | ||||
for i in cmd['files']: | for i in cmd['files']: | ||||
os.unlink(i) | os.unlink(i) | ||||
elif special == 'setup tar file': | |||||
shutil.copy(self.fixtures / | |||||
'testfile.tar.gz', self.tempdir) | |||||
else: # pragma: no cover | else: # pragma: no cover | ||||
raise ValueError('unhandled special: %s' % repr(special)) | raise ValueError('unhandled special: %s' % repr(special)) | ||||
@@ -2181,7 +2230,6 @@ class _TestCases(unittest.TestCase): | |||||
if outcheck: | if outcheck: | ||||
stdout.seek(0) | stdout.seek(0) | ||||
self.objcompare(_json_objstream(stdout), outcheck) | self.objcompare(_json_objstream(stdout), outcheck) | ||||
self.assertEqual(stderr.getvalue(), cmd.get('stderr', '')) | self.assertEqual(stderr.getvalue(), cmd.get('stderr', '')) | ||||
@@ -2218,7 +2266,8 @@ class _TestCases(unittest.TestCase): | |||||
self.setUp() | self.setUp() | ||||
os.chdir(self.tempdir) | os.chdir(self.tempdir) | ||||
self.run_command_file(i) | |||||
with self.subTest(file=i): | |||||
self.run_command_file(i) | |||||
# XXX - the following test may no longer be needed | # XXX - the following test may no longer be needed | ||||
def test_main(self): | def test_main(self): | ||||
@@ -0,0 +1,12 @@ | |||||
# This file contains magic that is used by tests and the code | |||||
# that must be present to work properly. | |||||
# Transmission adds this | |||||
0 string d10:created\ by BitTorrent file | |||||
!:mime application/x-bittorrent | |||||
!:ext torrent | |||||
# BitTornado adds this | |||||
0 string d13:creation\ date BitTorrent file | |||||
!:mime application/x-bittorrent | |||||
!:ext torrent | |||||
@@ -0,0 +1,155 @@ | |||||
import functools | |||||
import importlib | |||||
import magic | |||||
import os | |||||
import pathlib | |||||
import shutil | |||||
import tempfile | |||||
import unittest | |||||
from .utils import _debprint | |||||
__doc__ = ''' | |||||
This is a number of hacks to the Python magic module so that it works | |||||
better. These bugs should be fixed in the module, but I don't want to | |||||
deal w/ forking and getting the fixed upstreamed. | |||||
''' | |||||
magic.FileMagic = magic.namedtuple('FileMagic', ('mime_type', 'encoding', | |||||
'name', 'compressed_type'), defaults=[ '' ]) | |||||
from magic import * | |||||
__all__ = [ | |||||
'detect_from_filename', | |||||
'detect_from_content', | |||||
] | |||||
_mgc_data = {} | |||||
_lcl_mgc_data = None | |||||
# Wrapper magic.open so that we look past compression | |||||
_real_magic_open = magic.open | |||||
@functools.wraps(magic.open) | |||||
def open(flags): | |||||
return _real_magic_open(flags|magic.MAGIC_COMPRESS) | |||||
magic.open = open | |||||
def _create_filemagic(mime_detected, type_detected): | |||||
try: | |||||
mime_type, mime_encoding = mime_detected.split('; ', 1) | |||||
except ValueError: | |||||
raise ValueError(mime_detected) | |||||
kwargs = {} | |||||
try: | |||||
mime_encoding, compressed_type = mime_encoding.split(' compressed-encoding=') | |||||
except ValueError: | |||||
pass | |||||
else: | |||||
compressed_type, _ = compressed_type.split('; ', 1) | |||||
kwargs['compressed_type'] = compressed_type | |||||
return FileMagic(name=type_detected, mime_type=mime_type, | |||||
encoding=mime_encoding.replace('charset=', ''), **kwargs) | |||||
magic._create_filemagic = _create_filemagic | |||||
def _get_mgc_data(fname): | |||||
try: | |||||
return _mgc_data[fname] | |||||
except KeyError: | |||||
data = pathlib.Path(fname).read_bytes() | |||||
_mgc_data[fname] = data | |||||
return data | |||||
def _compile_file(inp, out, tempdir): | |||||
oldcwd = pathlib.Path.cwd() | |||||
try: | |||||
os.chdir(tempdir) | |||||
mag = magic.open(magic.MAGIC_NONE) | |||||
mag.compile(str(inp)) | |||||
inp.with_suffix('.mgc').rename(out) | |||||
finally: | |||||
os.chdir(oldcwd) | |||||
def _compile_lcl(): | |||||
magicfile = importlib.resources.files('medashare') / 'magic' | |||||
try: | |||||
d = pathlib.Path(tempfile.mkdtemp()).resolve() | |||||
# write out data | |||||
inpfile = d / 'magic' | |||||
inpfile.write_bytes(magicfile.read_bytes()) | |||||
# where it'll go | |||||
outfile = d / 'someotherfile' | |||||
_compile_file(inpfile, outfile, tempdir=d) | |||||
return outfile.read_bytes() | |||||
finally: | |||||
shutil.rmtree(d) | |||||
def _get_mgc_res(): | |||||
global _lcl_mgc_data | |||||
if _lcl_mgc_data is None: | |||||
try: | |||||
mgcfile = importlib.resources.files('medashare') / 'magic.mgc' | |||||
_lcl_mgc_data = mgcfile.read_bytes() | |||||
except FileNotFoundError: | |||||
_lcl_mgc_data = _compile_lcl() | |||||
_debprint(repr(_lcl_mgc_data)) | |||||
return _lcl_mgc_data | |||||
# patch magic to load custom magic file | |||||
_mgp = magic._libraries['magic'].magic_getpath | |||||
_mgp.restype = magic.c_char_p | |||||
_mgp.argtypes = [ magic.c_char_p, magic.c_int ] | |||||
_mlb = magic._libraries['magic'].magic_load_buffers | |||||
_mlb.restype = magic.c_int | |||||
_mlb.argtypes = [ magic.magic_t, magic.POINTER(magic.c_void_p), magic.POINTER(magic.c_size_t), magic.c_size_t ] | |||||
def _new_magic_load(self, fname=None): | |||||
files = _mgp(None, 0).decode('utf-8') + '.mgc' + ':' + str(pathlib.Path(__file__).parent / 'magic') | |||||
return magic._load(self._magic_t, files.encode('utf-8')) | |||||
# XXX - for some reason this code isn't working | |||||
files = [ _mgp(None, 0).decode('utf-8') + '.mgc' ] | |||||
buffers = [ _get_mgc_data(x) for x in files ] + [ _get_mgc_res() ] | |||||
#buffers.reverse() | |||||
del buffers[0] | |||||
cnt = len(buffers) | |||||
mgcdatas = [ (magic.c_char * len(x))(*x) for x in buffers ] | |||||
bufs = (magic.c_void_p * cnt)(*(magic.cast(magic.pointer(x), magic.c_void_p) for x in mgcdatas)) | |||||
sizes = (magic.c_size_t * cnt)(*(len(x) for x in buffers)) | |||||
_debprint('mg:', cnt, repr([len(x) for x in buffers])) | |||||
r = _mlb(self._magic_t, bufs, sizes, cnt) | |||||
return r | |||||
magic.Magic.load = _new_magic_load | |||||
class _TestMagic(unittest.TestCase): | |||||
def test_create_filemagic(self): | |||||
a = _create_filemagic('application/x-tar; charset=binary compressed-encoding=application/gzip; charset=binary', 'foobar') | |||||
self.assertEqual(a.mime_type, 'application/x-tar') | |||||
self.assertEqual(a.encoding, 'binary') | |||||
self.assertEqual(a.compressed_type, 'application/gzip') | |||||
self.assertEqual(a.name, 'foobar') |
@@ -6,3 +6,4 @@ from .cli import _TestMigrations | |||||
from .tags import _TestTagCache | from .tags import _TestTagCache | ||||
from .mtree import Test | from .mtree import Test | ||||
from .server import _TestCases, _TestPostConfig | from .server import _TestCases, _TestPostConfig | ||||
from .magic_wrap import _TestMagic |
@@ -1,8 +1,23 @@ | |||||
import base64 | import base64 | ||||
import datetime | import datetime | ||||
import pasn1 | import pasn1 | ||||
import sys | |||||
import uuid | import uuid | ||||
_real_stderr = sys.stderr | |||||
def _debprint(*args): # pragma: no cover | |||||
import traceback, sys, os.path | |||||
st = traceback.extract_stack(limit=2)[0] | |||||
sep = '' | |||||
if args: | |||||
sep = ':' | |||||
print('%s:%d%s' % (os.path.basename(st.filename), st.lineno, sep), | |||||
*args, file=_real_stderr) | |||||
sys.stderr.flush() | |||||
def _makeuuid(s): | def _makeuuid(s): | ||||
if isinstance(s, uuid.UUID): | if isinstance(s, uuid.UUID): | ||||
return s | return s | ||||
@@ -2,13 +2,43 @@ | |||||
# python setup.py --dry-run --verbose install | # python setup.py --dry-run --verbose install | ||||
import os.path | import os.path | ||||
from setuptools import setup, find_packages | |||||
import pathlib | |||||
import shutil | |||||
import subprocess | |||||
from setuptools import setup, find_packages, Command, Extension | |||||
from setuptools.command.build_ext import build_ext | |||||
#from medashare.magic_wrap import compile_file | |||||
from distutils.core import setup | |||||
class file_ext(build_ext): | |||||
def __init__(self, dist): | |||||
super().__init__(dist) | |||||
def run(self): | |||||
# do the building | |||||
#print(repr(self.distribution)) | |||||
fnames = [ (x, pathlib.Path(self.build_lib) / 'medashare' / x) for x in self.get_source_files() ] | |||||
oldcwd = os.getcwd() | |||||
for src, dst in fnames: | |||||
os.chdir(oldcwd) | |||||
shutil.copyfile(src, dst) | |||||
os.chdir(dst.parent) | |||||
cmd = [ 'file', '-C', '-m' ] + [ str(dst) for src, dst in fnames ] | |||||
#print('running:', cmd) | |||||
r = subprocess.run(cmd) | |||||
os.chdir(oldcwd) | |||||
r.check_returncode() | |||||
def get_outputs(self): | |||||
return [ '%s.mgc' % i for i in self.get_source_files() ] | |||||
# method build_extension not needed, in run | |||||
setup( | setup( | ||||
name='medashare', | name='medashare', | ||||
version='0.1.0', | |||||
version='0.1.1', | |||||
author='John-Mark Gurney', | author='John-Mark Gurney', | ||||
author_email='jmg@funkthat.com', | author_email='jmg@funkthat.com', | ||||
packages=find_packages(), | packages=find_packages(), | ||||
@@ -18,6 +48,9 @@ setup( | |||||
#download_url='', | #download_url='', | ||||
long_description=open('README.md').read(), | long_description=open('README.md').read(), | ||||
python_requires='>=3.8', | python_requires='>=3.8', | ||||
# This isn't needed till magic_wrap.py can use it | |||||
#cmdclass=dict(build_ext=file_ext), | |||||
#ext_modules=[ Extension(name='magic', sources=['medashare/magic']) ], | |||||
install_requires=[ | install_requires=[ | ||||
'alembic', | 'alembic', | ||||
'base58', | 'base58', | ||||
@@ -30,9 +63,14 @@ setup( | |||||
'hypercorn', # option, for server only? | 'hypercorn', # option, for server only? | ||||
'orm', | 'orm', | ||||
'pasn1 @ git+https://www.funkthat.com/gitea/jmg/pasn1.git@c6c64510b42292557ace2b77272eb32cb647399d#egg=pasn1', | 'pasn1 @ git+https://www.funkthat.com/gitea/jmg/pasn1.git@c6c64510b42292557ace2b77272eb32cb647399d#egg=pasn1', | ||||
'python-libarchive @ git+https://www.funkthat.com/gitea/jmg/python-libarchive.git#egg=python-libarchive', | |||||
'file-magic @ git+https://github.com/file/file.git#egg=file-magic&subdirectory=python', | 'file-magic @ git+https://github.com/file/file.git#egg=file-magic&subdirectory=python', | ||||
'pydantic[dotenv]', | 'pydantic[dotenv]', | ||||
], | ], | ||||
include_package_data=True, | |||||
package_data={ | |||||
'medashare': [ 'alembic/**/*.py', 'alembic.ini', ], | |||||
}, | |||||
extras_require = { | extras_require = { | ||||
# requests needed for fastpi.testclient.TestClient | # requests needed for fastpi.testclient.TestClient | ||||
'dev': [ 'coverage', 'requests' ], | 'dev': [ 'coverage', 'requests' ], | ||||