diff --git a/ui/fixtures/RAW_CANON_5D_ARGB.CR2 b/ui/fixtures/RAW_CANON_5D_ARGB.CR2 new file mode 100644 index 0000000..b5f7c9b Binary files /dev/null and b/ui/fixtures/RAW_CANON_5D_ARGB.CR2 differ diff --git a/ui/fixtures/RAW_CANON_5D_ARGB.CR2.fmt b/ui/fixtures/RAW_CANON_5D_ARGB.CR2.fmt new file mode 100644 index 0000000..aea1e51 --- /dev/null +++ b/ui/fixtures/RAW_CANON_5D_ARGB.CR2.fmt @@ -0,0 +1 @@ +{"hash": "56729ee7fb7cab255f3a5b2aa09cebdf92f986f18994df674dd54b33b81a72b18dd944e23253e164f5458ea9e0dc66cb32ab58e368f498e469279faae95f33c3", "parts": [[0, 215], [234, 76114]], "size": 11138246} diff --git a/ui/fixtures/RAW_CANON_G2.CRW b/ui/fixtures/RAW_CANON_G2.CRW new file mode 100644 index 0000000..179a96a Binary files /dev/null and b/ui/fixtures/RAW_CANON_G2.CRW differ diff --git a/ui/fixtures/RAW_CANON_G2.CRW.fmt b/ui/fixtures/RAW_CANON_G2.CRW.fmt new file mode 100644 index 0000000..a649879 --- /dev/null +++ b/ui/fixtures/RAW_CANON_G2.CRW.fmt @@ -0,0 +1 @@ +{"hash": "9f8c0c1c59937728ac360733c223291d0030811004b93196d55c0174493540233b673b6bc92aef2e7fd4708a110351664872530bf35a12b7f4fb65a4cd1e1e1e", "parts": [[0, 26], [3339318, 5372], [3344690, 360], [3345050, 68], [3345118, 22], [3345140, 100], [3345240, 42], [3345282, 36], [3345318, 4696], [3350014, 132], [3350146, 136], [3350282, 46], [3350328, 4]], "size": 3350332} diff --git a/ui/medashare/metadata/crw.py b/ui/medashare/metadata/crw.py index 36aed81..b858313 100644 --- a/ui/medashare/metadata/crw.py +++ b/ui/medashare/metadata/crw.py @@ -5,12 +5,17 @@ from .BitField import BitField from fractions import Fraction from io import BytesIO +import bisect import enum +import hashlib import io import itertools +import json +import os import pathlib import string import struct +import unittest # Various specifications: # CRW: https://web.archive.org/web/20081230095207/http://xyrion.org/ciff/CIFFspecV1R04.pdf @@ -53,6 +58,7 @@ class TagName: cls.__init__(r, name, tag) cls._cache[tag] = r + cls._cache[name] = r cls._names.add(name) return r @@ -139,8 +145,9 @@ _tags = { 'CAMERA_IDENT': (1, 16), # Camera Owner 'CAMERA_MAKEMODEL': (1, 10), # Camera Make and Model 'CAMERA_SHUTTERCOUNT': (3, 4), - 'CAMERA_SERIALNO': (3, 11), - 'CAMERA_MODELID': (3, 0x34), # Camera model id + 'CAMERA_BODYID': (3, 0x0b), + 'CAMERA_SERIALNO': (3, 0x17), + 'CAMERA_MODELID': (3, 0x34), # Camera model id 'CAMERA_DECODERTABLE': (3, 0x35), 'MANUFACTURE_COUNTRY': (1, 13), # Camera Counter @@ -697,6 +704,18 @@ def getstructs(fh, aoff, len, endian, fmt): def printcode(code, data): print("0x%04x: %s" % (code, data)) +def defascii(code, fh, aoff, len, endian, cnt=1): + fh.seek(aoff) + data = fh.read(len) + + #print('da:', len, repr(data)) + strs = tuple(x.decode('ascii') for x in data.split(b'\x00', cnt)[:cnt]) + + if cnt == 1: + return strs[0] + + return strs + def defdata(code, fh, aoff, len, endian): fh.seek(aoff) data = fh.read(len) @@ -780,23 +799,19 @@ def doexposeinfo(*args): datahandles = [ #(4, 7, "Thumbnail Big", None), #(4, 8, "Thumbnail Small", None), - (1, 0x16, "File CRW", None), - (1, 0x17, "File THM", None), + + # spec says TIFF like, but there isn't double NUL at end + (1, 0x0a, "Make and Model", lambda *args: defascii(*args, 2)), (2, 0x29, "Focal length (mm), sensor x, y in mils", dofocal), (2, 0x2a, 'ISO, Aperture, Shutter, WBI', doexposeinfo), (2, 0x2d, 'Lots of settings', docamerasettings), (0, None, "def byte alignment", defbyte), - (1, None, "def character string", defdata), + (1, None, "def character string", defascii), (2, None, "def half words", defhalf), (3, None, "def words", defword), (4, None, "def arbitrary structure", defdata), ] -THMB_BIG = 0x2007 -THMB_SML = 0x2008 -FILE_CRW = 0x0816 -FILE_THM = 0x0817 - #0805 desc str #0815 fmt:desc str #080a make NUL model NUL @@ -844,6 +859,9 @@ class heapcontainer(list): def find(self, k): r = self.searchheap(lambda k, v, matk=k: k == matk) + if not len(r): + raise ValueError('tag %s not found' % repr(k)) + if len(r) != 1: raise RuntimeError('more than one found: %s' % repr(k)) @@ -979,7 +997,7 @@ def tiff_ifd(fh, endian, off): yield (None, nextifd, None) -def parse_ciff(fh, offset, length, endian): +def parse_ciff(fh, offset, length, endian, ignoretags): ret = heapcontainer() #print offset, length fh.seek(offset + length - struct.calcsize("I")) @@ -1017,16 +1035,24 @@ def parse_ciff(fh, offset, length, endian): #print (dataType, code), aoff, len if dataType in heapRECs: #print 'recursing in parse_ciff, type: %d, code: 0x%04x' % (dataType, code) - ret.append((dtc, parse_ciff(fh, aoff, olen, endian))) + ret.append((dtc, parse_ciff(fh, aoff, olen, endian, ignoretags))) #print 'back' continue + if dtc in ignoretags: + #print('ignoring:', repr(dtc)) + continue + for itype, iidcode, string, fun in datahandles: - if itype == dataType and iidcode == None or iidcode == code: - #print "found:", string + if itype == dataType and (iidcode == None or iidcode == code): + #print('found:', string, repr(dtc)) if fun: ret.append((dtc, fun(getIDCode(type), fh, aoff, len, endian))) + #else: + # print('ukn dtc:', repr(dtc)) break + else: + raise NotImplementedError('unhandled: %s' % repr(dtc)) return ret @@ -1045,7 +1071,9 @@ def getendian(val): return endian -def idcrw(fh): +def idcrw(fh, ignoretags=()): + ignoretags = set(TagName.find(x) for x in ignoretags) + fh.seek(0) isjpeg = False try: @@ -1056,7 +1084,7 @@ def idcrw(fh): data = fh.read(2) if data != b'\xff\xd8': - raise x + raise ValueError('not a TIFF style (CRW/CR2) or JPEG file') # Find Exif marker pos = 2 @@ -1114,7 +1142,7 @@ def idcrw(fh): if version != 0x00010002: raise ValueError('incorrect version: %08x' % version) fh.seek(0, io.SEEK_END) - return parse_ciff(fh, hlen, fh.tell() - hlen, endian) + return parse_ciff(fh, hlen, fh.tell() - hlen, endian, ignoretags) else: raise ValueError('unknown value: %d' % hlen) @@ -1155,12 +1183,421 @@ if __name__ == '__main__': else: pprint.pprint(ci) -import unittest +class SparseCover: + '''Provides a range coverage function. It allows checks if + [pos, cnt) has been added. + ''' + + def __init__(self, l=[]): + self._sc = [] + + for i in l: + self.add(*i) + + def add(self, pos, cnt): + '''Add [pos, cnt) as present.''' + + sc = self._sc + + scidx = bisect.bisect_right(sc, (pos, float('inf'))) + + #print('add:', repr(sc), pos, cnt, scidx) + + # possibly extend last entry + if scidx - 1 >= 0: + pos1, cnt1 = sc[scidx - 1] + pos1end = pos1 + cnt1 + if pos <= pos1end: + # merge + sc[scidx - 1] = (pos1, max(pos1end - pos1, pos + cnt - pos1)) + return + + # possibly prefix to entry + posend = pos + cnt + if scidx < len(sc) and posend >= sc[scidx][0]: + pos1, cnt1 = sc[scidx] + sc[scidx] = (pos, max(pos1 + cnt1 - pos, posend - pos)) + return + + sc[scidx:scidx] = [ (pos, cnt) ] + + def _check(self, pos1, cnt1, pos2, cnt2): + # check if 2 is within 1 + pos1end = pos1 + cnt1 + pos2end = pos2 + cnt2 + if pos2 >= pos1 and pos2end <= pos1end: + return True + + return False + + def tolist(self): + '''Return a copy of the list. Can be used to recreate or + serialize. + + That is: + SparseCover(sc.tolist()) == sc + ''' + + return self._sc[:] + + def covered(self, pos, cnt): + '''Check to see if [pos, cnt) is covered. + ''' + + sc = self._sc + + if len(sc) == 0: + return False + + scidx = bisect.bisect_right(sc, (pos, float('inf'))) + + #print('f:', repr(sc), pos, cnt, scidx) + + if scidx - 1 >= 0 and self._check(*sc[scidx - 1], pos, cnt): + return True + + if scidx >= len(sc): + return False + + return False + +class _TestSparseCover(unittest.TestCase): + def test_sc_wronginput(self): + # wrong order + self.assertEqual(SparseCover([(10, 1), (1, 5)]).tolist(), [ (1, 5), (10, 1) ]) + + # over lapping, no extension + self.assertEqual(SparseCover([(10, 10), (11, 5)]).tolist(), [ (10, 10) ]) + + # extending + self.assertEqual(SparseCover([(10, 10), (11, 10)]).tolist(), [ (10, 11) ]) + + # prefix overlap + self.assertEqual(SparseCover([(10, 10), (5, 10)]).tolist(), [ (5, 15) ]) + + # prefix exact + self.assertEqual(SparseCover([(10, 10), (5, 5)]).tolist(), [ (5, 15) ]) + + def test_sc(self): + sc = SparseCover() + + self.assertFalse(sc.covered(5, 10)) + + sc.add(10, 50) + + self.assertFalse(sc.covered(5, 10)) + self.assertFalse(sc.covered(50, 50)) + + self.assertTrue(sc.covered(10, 50)) + self.assertTrue(sc.covered(20, 10)) + + sc.add(100, 50) + + self.assertFalse(sc.covered(5, 10)) + self.assertFalse(sc.covered(50, 50)) + self.assertFalse(sc.covered(50, 100)) + self.assertFalse(sc.covered(120, 100)) + self.assertFalse(sc.covered(150, 1)) + + self.assertTrue(sc.covered(20, 10)) + self.assertTrue(sc.covered(100, 50)) + self.assertTrue(sc.covered(100, 1)) + self.assertTrue(sc.covered(149, 1)) + + self.assertEqual(sc.tolist(), [ (10, 50), (100, 50) ]) + + sc = SparseCover(sc.tolist()) + + sc.add(140, 20) + + self.assertEqual(sc.tolist(), [ (10, 50), (100, 60) ]) + + sc.add(1, 5) + + self.assertFalse(sc.covered(50, 100)) + + self.assertTrue(sc.covered(3, 1)) + + self.assertEqual(sc.tolist(), [ (1, 5), (10, 50), (100, 60) ]) + +class _FileEmulator: + __real_open = open + __hash_factory = hashlib.sha512 + + def __init__(self, fname, origfile, emulmetafile): + self._closed = False + self._updateable = False + self._updated = False + self._basefp = None + self._emulmetafile = None + self._pos = 0 + self._sc = None + + orighash = None + + try: + origfp = self.__real_open(origfile, 'rb') + orighash = self._hexdgstfp(origfp) + except FileNotFoundError: + origfp = None + + + try: + with self.__real_open(emulmetafile) as fp: + emuldata = json.load(fp) + + if orighash is not None and emuldata['hash'] != orighash: + raise RuntimeError('hash of emulated date (%s) does not match original file (%s)' % (repr(str(emulmetafile)), repr(str(origfile)))) + + except FileNotFoundError: + if origfp is None: + raise RuntimeError('emulated metadata not present and original file not present: %s' % repr(str(origfile))) + # orig file exists time to emulate it. + + + origfp.seek(0, os.SEEK_END) + + emuldata = { + 'hash': orighash, + 'parts': [], + 'size': origfp.tell(), + } + + # need to make sure this gets written out + self._updated = True + + if origfp is not None: + try: + self._basefp = self.__real_open(fname, 'r+b') + except FileNotFoundError: + # if this fails, race lost, try again + self._basefp = self.__real_open(fname, 'x+b') + + self._updateable = True + else: + self._basefp = self.__real_open(fname, 'rb') + + + self._emulmetafile = emulmetafile + self._emuldata = emuldata + self._origfp = origfp + + self._sc = SparseCover(emuldata['parts']) + + @classmethod + def _hexdgstfp(cls, fp): + fp.seek(0) + + dgst = cls.__hash_factory() + d = None + while d != b'': + d = fp.read(64*1024) + dgst.update(d) + + return dgst.hexdigest() + + def __enter__(self): + return self + + @property + def closed(self): + return self._closed + + def close(self): + if self._closed: + return + + if self._origfp is not None: + self._origfp.close() + self._origfp = None + + if self._basefp is not None: + self._basefp.close() + self._basefp = None + + self._closed = True + + if self._updateable and self._updated: + self._emuldata['parts'] = self._sc.tolist() + with self.__real_open(self._emulmetafile, 'w') as fp: + json.dump(self._emuldata, fp) + print(file=fp) + + self._updated = False + self._updateable = False + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + return False + + def __del__(self): + self.close() + + def tell(self): + return self._pos + + def seek(self, pos, whence=os.SEEK_SET): + if whence == os.SEEK_SET: + self._pos = pos + elif whence == os.SEEK_END: + self._pos = self._emuldata['size'] + pos + else: + raise ValueError('invalid whence: %s' % repr(str(whence))) + + def read(self, cnt): + #if cnt > 100*1000: + # import traceback + # traceback.print_stack() + + if self.closed: + raise ValueError('read of closed file') + + if not self._sc.covered(self._pos, cnt): + if not self._updateable: + raise RuntimeError('dota not present and not updateable') + + # read in the data + self._origfp.seek(self._pos) + data = self._origfp.read(cnt) + + # write it out + self._basefp.seek(self._pos) + self._basefp.write(data) + self._basefp.flush() + + self._sc.add(self._pos, cnt) + self._updated = True + + #print('d1:', len(data), repr(data[:50]), (self._pos, cnt), repr(self._basefp)) + + self._pos += cnt + + return data + + self._basefp.seek(self._pos) + data = self._basefp.read(cnt) + #print('d2:', len(data), repr(data[:50]), (self._pos, cnt), repr(self._basefp)) + + self._pos += cnt + + return data + +class FileMockTest: + ''' + Mixin w/ unittest.TestCase to mock open, and store subsets + of files. + + The original base files are taken from the attribute fmt_origpath. + The sparse file, and it's meta data will be stores in fmt_basepath. + + You can view that fmt_origpath is backing fmt_basepath in that any + missing files/data from fmt_basepath will be obtained from + fmt_origpath. + + Usage: + class MyTestCase(FileMockTest, TestCase): + fmt_origpath = pathlib.Path(xxx) + + def setUp(self): + super().setUp() + ... + + Required Attributes: + fmt_basepath - underlying path + fmt_origpa/h - path to original files + + Note: if one of the following methods are used, super MUST be + called: setUp, tearDown. + + For example, if the class has a setUp function: + class Example(FileMockTest, TestCase): + def setUp(self): + <... custom setup code ...> + + super().setUp() + ''' + + __real_open = open + __ext = '.fmt' + + def setUp(self): + super().setUp() + + try: + self.fmt_origpath = self.fmt_origpath.resolve() + self.fmt_basepath = self.fmt_basepath.resolve() + except AttributeError: + raise RuntimeError('attribute fmt_origpath not defined on class %s' % self.__class__.__name__) + + self.__openmockpatch = unittest.mock.patch(__name__ + '.open') + + self.__openmock = self.__openmockpatch.start() + + self.__openmock.side_effect = self.__genopen + + def __genopen(self, fname, *args, **kwargs): + fname = pathlib.Path(fname) + #print('genop:', repr(fname), repr(self.fmt_basepath), repr(args)) + + emulmetafile = pathlib.Path(str(fname) + self.__ext) + + # 1) not in fmt_basepath + # 2) exists in basepath, but not emulated + if not fname.is_relative_to(self.fmt_basepath.resolve()) or \ + fname.exists() and not emulmetafile.exists(): + return self.__real_open(fname, *args, **kwargs) + + # either base file is missing, or base file is emulated + + if not args or args[0] != 'rb': + raise ValueError('can only emulate a file for reading binary') + + fname = fname.resolve() + + # need to try to pull from original file + bpplen = len(self.fmt_basepath.parts) + assert fname.parts[:bpplen] == self.fmt_basepath.parts + origfile = pathlib.Path(*(self.fmt_origpath.parts + fname.parts[bpplen:])) + + return _FileEmulator(fname, origfile, emulmetafile) + + def tearDown(self): + try: + self.__openmockpatch.stop() + except Exception: + pass + + super().tearDown() + +class _TestFileMock(unittest.TestCase): + def xtest_foo(self): + class TC(FileMockTest, unittest.TestCase): + def test_openmocked(self): + self.assertIsNot(open, FileMockTest._real_open) + + def test_basicopen(self): + with open('foo', 'r') as fp: + fp.read(10) + + + loader = unittest.TestLoader() + try: + loader.loadTestsFromTestCase(TC).debug() + except Exception: + import traceback + traceback.print_exc() + raise + +class _TestCRW(FileMockTest, unittest.TestCase): + fmt_basepath = pathlib.Path('fixtures') + fmt_origpath = fmt_basepath / 'original' -class _TestCRW(unittest.TestCase): def setUp(self): self.fixtures = pathlib.Path('fixtures').resolve() + super().setUp() + def test_tagname(self): a = TagName('foo', (1, 2)) @@ -1278,11 +1715,15 @@ class _TestCRW(unittest.TestCase): def test_crw(self): with open(self.fixtures / 'RAW_CANON_G2.CRW', 'rb') as fp: - ci = idcrw(fp) + ci = idcrw(fp, [ 'CRW_CCDDATA', 'CRW_THMBBIG' ]) self.assertEqual(ci['CRW_INFO']['INFO_EXPOSEINFO']['CAMERA_MODELID'], (17825792, 2222501223)) self.assertEqual(ci.find('CAMERA_MODELID'), (17825792, 2222501223)) + self.assertEqual(ci.find('CAMERA_MAKEMODEL'), ('Canon', 'Canon PowerShot G2')) + self.assertEqual(ci.find('UNKN_ORIGFNAME'), 'CRW_0011.CRW') + self.assertEqual(ci.find('INFO_TARGETTYPE'), (0, 19680, 39964, 10801)) + #print(repr(ci)) #print(repr(list(ci.keys()))) #print(repr(list(ci['CRW_INFO'].keys()))) #print('unkn:', sorted(TagName._unkn)) diff --git a/ui/medashare/tests.py b/ui/medashare/tests.py index 5ab2915..7120e3f 100644 --- a/ui/medashare/tests.py +++ b/ui/medashare/tests.py @@ -7,3 +7,6 @@ from .tags import _TestTagCache from .mtree import Test from .server import _TestCases, _TestPostConfig from .magic import _TestMagic +from .metadata.crw import _TestCRW +from .metadata.crw import _TestFileMock +from .metadata.crw import _TestSparseCover