From a02f32f410919dddb811db31a8f2d556d6653573 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Thu, 4 May 2023 20:21:07 -0700 Subject: [PATCH] add spec links, drop dead code, add JPEG EXIF parsing... also minor code coverage tweak due to a bug --- ui/medashare/metadata/crw.py | 128 +++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 58 deletions(-) diff --git a/ui/medashare/metadata/crw.py b/ui/medashare/metadata/crw.py index 02e9229..b74de31 100644 --- a/ui/medashare/metadata/crw.py +++ b/ui/medashare/metadata/crw.py @@ -11,6 +11,16 @@ import pathlib import string import struct +# Various specifications: +# CRW: https://web.archive.org/web/20081230095207/http://xyrion.org/ciff/CIFFspecV1R04.pdf +# CR2: https://web.archive.org/web/20230404015346/http://lclevy.free.fr/cr2/ +# JPEG: https://www.w3.org/Graphics/JPEG/itu-t81.pdf +# JFIF: http://www.w3.org/Graphics/JPEG/jfif3.pdf +# EXIF: https://www.cipa.jp/std/documents/e/DC-X008-Translation-2019-E.pdf +# +# Exif Tags: +# https://web.archive.org/web/20230326011043/https://exiftool.org/TagNames/EXIF.html + # At least for Canon G2 CRW's @@ -477,13 +487,7 @@ class ExifTag(enum.IntEnum): FocalLengthIn35mmFilm = 41989 SceneCaptureType = 41990 ImageUniqueID = 42016 - -class AutoName(enum.Enum): - def _generate_next_value_(name, start, count, last_values): - return name - -class Unknown(AutoName): - pass + LensMake = 42035 exifhandlers = { ExifTag.ExposureProgram: lambda x, y, v, o: @@ -951,15 +955,6 @@ def tiff_ifd(fh, endian, off): yield (None, nextifd, None) -def parse_exif(fh, endian, off): - r = [] - for tag, res in tiff_ifd(fh, endian, off): - if tag is None: - return res, r - r.append((tag, res)) - - raise RuntimeError('tiff_ifd did not return a None tag') - def parse_ciff(fh, offset, length, endian): ret = heapcontainer() #print offset, length @@ -1009,29 +1004,6 @@ def parse_ciff(fh, offset, length, endian): ret.append((dtc, fun(getIDCode(type), fh, aoff, len, endian))) break - continue - - if 1: - if type >> 8 in [ 0x28, 0x30]: - print('recursing in parse_ciff', aoff, olen) - parse_ciff(fh, aoff, olen, endian) - print('back') - elif 0 and type in [ THMB_BIG, THMB_SML ]: - fh.seek(aoff) - open('%x.jpg' % type, "a+").write(fh.read(olen)) - else: - fh.seek(aoff) - data = fh.read(len) - print("%04x: %s" % (type, ''.join(map(lambda x: '%02x' % ord(x), data)))) - print(" %s" % repr(data)) - - elif type == 0x080a: - # handle camera name - pass - elif type == 0x1835: - fh.seek(aoff + 2) - width, height = readstruct(fh, "HH") - return ret def getendian(val): @@ -1077,12 +1049,31 @@ def idcrw(fh): except ValueError as x: # Try to see if it's a JPEG file fh.seek(0) - data = fh.read(12) - if data[:2] != '\xff\xd8': + + data = fh.read(2) + if data != b'\xff\xd8': raise x - if data[2] != '\xff' or data[6:10] != 'Exif': - raise ValueError('Exif data not at start of JPEG file') + # Find Exif marker + pos = 2 + while True: + fh.seek(pos) + data = fh.read(10) + if data == b'': + raise ValueError('unexpected end of file') + + if data[:2] == b'\xff\xd9': + # EOI + raise ValueError('Exif data not found.') + + if data[:2] != b'\xff\xe1' or data[4:10] != b'Exif\x00\x00': + # Skip over marker + pos += 2 + int.from_bytes(data[2:4], 'big') + continue + + # required due to coverage bug + if True: #pragma: no cover + break fh = fileoff(fh, fh.tell()) endian = getendian(fh.read(2)) @@ -1093,8 +1084,8 @@ def idcrw(fh): if hlen == 0x2a: #Tiff hoff, idstr, ver, hlen = readstruct(fh, endian + "I2sHI") - if not isjpeg and hoff < 0x10 and idstr != 'CR' and ver != 2: - raise NotImplementedError('normal TIFF, not a CR2') + if not isjpeg and (hoff != 0x10 or idstr != b'CR' or ver != 2): + raise ValueError('normal TIFF, not a CR2') nextoff = [ hoff ] r = [] while nextoff and nextoff[0] != 0: @@ -1192,23 +1183,35 @@ class _TestCRW(unittest.TestCase): def test_bogus(self): # make sure various bogus "files" raise an error - with self.assertRaises(ValueError): - idcrw(BytesIO(b'asldfkjasdklfj')) + structerrors = [ + # Bad CRW/TIFF files + b'II\x1a\x00ldfkjasdklfj', + ] + for i in structerrors: + with self.subTest(filebytes=repr(i)), self.assertRaises(struct.error): + idcrw(BytesIO(i)) - with self.assertRaises(ValueError): - idcrw(BytesIO(b'IIldfkjasdklfj')) + valueerrors = [ + # Generic bad file + b'asldfkjasdklfj', - with self.assertRaises(struct.error): - idcrw(BytesIO(b'II\x1a\x00ldfkjasdklfj')) + # Bad CRW/TIFF files + b'IIldfkjasdklfj', + b'II\x1a\x00ldfkjasdklfjasoijeflsdkfjsldkfj', + b'II\x1a\x00\x00\x00HEAPldfkjasdklfjasoijeflsdkfjsldkfj', + b'II\x1a\x00\x00\x00HEAPCCDRldfkjasdklfjasoijeflsdkfjsldkfj', - with self.assertRaises(ValueError): - idcrw(BytesIO(b'II\x1a\x00ldfkjasdklfjasoijeflsdkfjsldkfj')) + b'II\x2a\x00\x00\x00\x00\x00CRldfkjasdklfjasoijeflsdkfjsldkfj', - with self.assertRaises(ValueError): - idcrw(BytesIO(b'II\x1a\x00\x00\x00HEAPldfkjasdklfjasoijeflsdkfjsldkfj')) + # Bad JPEG/JFIF/EXIF files + b'\xff\xd8', + b'\xff\xd8\xff\xd9', + b'\xff\xd8\xff\xd9', + ] - with self.assertRaises(ValueError): - idcrw(BytesIO(b'II\x1a\x00\x00\x00HEAPCCDRldfkjasdklfjasoijeflsdkfjsldkfj')) + for i in valueerrors: + with self.subTest(filebytes=repr(i)), self.assertRaises(ValueError): + idcrw(BytesIO(i)) def test_crw(self): with open(self.fixtures / 'RAW_CANON_G2.CRW', 'rb') as fp: @@ -1231,4 +1234,13 @@ class _TestCRW(unittest.TestCase): self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.ExposureTime][0], Fraction(1, 200)) - print(repr(ci)) + #print(repr(ci)) + + def test_jpegexif(self): + with open(self.fixtures / 'exif.jpeg', 'rb') as fp: + ci = idcrw(fp) + + self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.ISOSpeedRatings][0], 100) + self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.UserComment], b'UNICODE\x00' + 'abc123สวัสดี'.encode('utf-16-be')) + self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.LensMake], b'Random Lens Maker\x00') + self.assertEqual(ci[0][TIFFTag.ImageDescription], b'Some comment\x00')