Browse Source

add spec links, drop dead code, add JPEG EXIF parsing...

also minor code coverage tweak due to a bug
main
John-Mark Gurney 1 year ago
parent
commit
a02f32f410
1 changed files with 70 additions and 58 deletions
  1. +70
    -58
      ui/medashare/metadata/crw.py

+ 70
- 58
ui/medashare/metadata/crw.py View File

@@ -11,6 +11,16 @@ import pathlib
import string
import struct

# Various specifications:
# CRW: https://web.archive.org/web/20081230095207/http://xyrion.org/ciff/CIFFspecV1R04.pdf
# CR2: https://web.archive.org/web/20230404015346/http://lclevy.free.fr/cr2/
# JPEG: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
# JFIF: http://www.w3.org/Graphics/JPEG/jfif3.pdf
# EXIF: https://www.cipa.jp/std/documents/e/DC-X008-Translation-2019-E.pdf
#
# Exif Tags:
# https://web.archive.org/web/20230326011043/https://exiftool.org/TagNames/EXIF.html


# At least for Canon G2 CRW's

@@ -477,13 +487,7 @@ class ExifTag(enum.IntEnum):
FocalLengthIn35mmFilm = 41989
SceneCaptureType = 41990
ImageUniqueID = 42016

class AutoName(enum.Enum):
def _generate_next_value_(name, start, count, last_values):
return name

class Unknown(AutoName):
pass
LensMake = 42035

exifhandlers = {
ExifTag.ExposureProgram: lambda x, y, v, o:
@@ -951,15 +955,6 @@ def tiff_ifd(fh, endian, off):

yield (None, nextifd, None)

def parse_exif(fh, endian, off):
r = []
for tag, res in tiff_ifd(fh, endian, off):
if tag is None:
return res, r
r.append((tag, res))

raise RuntimeError('tiff_ifd did not return a None tag')
def parse_ciff(fh, offset, length, endian):
ret = heapcontainer()
#print offset, length
@@ -1009,29 +1004,6 @@ def parse_ciff(fh, offset, length, endian):
ret.append((dtc, fun(getIDCode(type), fh, aoff, len, endian)))
break

continue

if 1:
if type >> 8 in [ 0x28, 0x30]:
print('recursing in parse_ciff', aoff, olen)
parse_ciff(fh, aoff, olen, endian)
print('back')
elif 0 and type in [ THMB_BIG, THMB_SML ]:
fh.seek(aoff)
open('%x.jpg' % type, "a+").write(fh.read(olen))
else:
fh.seek(aoff)
data = fh.read(len)
print("%04x: %s" % (type, ''.join(map(lambda x: '%02x' % ord(x), data))))
print(" %s" % repr(data))

elif type == 0x080a:
# handle camera name
pass
elif type == 0x1835:
fh.seek(aoff + 2)
width, height = readstruct(fh, "HH")

return ret

def getendian(val):
@@ -1077,12 +1049,31 @@ def idcrw(fh):
except ValueError as x:
# Try to see if it's a JPEG file
fh.seek(0)
data = fh.read(12)
if data[:2] != '\xff\xd8':

data = fh.read(2)
if data != b'\xff\xd8':
raise x

if data[2] != '\xff' or data[6:10] != 'Exif':
raise ValueError('Exif data not at start of JPEG file')
# Find Exif marker
pos = 2
while True:
fh.seek(pos)
data = fh.read(10)
if data == b'':
raise ValueError('unexpected end of file')

if data[:2] == b'\xff\xd9':
# EOI
raise ValueError('Exif data not found.')

if data[:2] != b'\xff\xe1' or data[4:10] != b'Exif\x00\x00':
# Skip over marker
pos += 2 + int.from_bytes(data[2:4], 'big')
continue

# required due to coverage bug
if True: #pragma: no cover
break

fh = fileoff(fh, fh.tell())
endian = getendian(fh.read(2))
@@ -1093,8 +1084,8 @@ def idcrw(fh):
if hlen == 0x2a:
#Tiff
hoff, idstr, ver, hlen = readstruct(fh, endian + "I2sHI")
if not isjpeg and hoff < 0x10 and idstr != 'CR' and ver != 2:
raise NotImplementedError('normal TIFF, not a CR2')
if not isjpeg and (hoff != 0x10 or idstr != b'CR' or ver != 2):
raise ValueError('normal TIFF, not a CR2')
nextoff = [ hoff ]
r = []
while nextoff and nextoff[0] != 0:
@@ -1192,23 +1183,35 @@ class _TestCRW(unittest.TestCase):
def test_bogus(self):
# make sure various bogus "files" raise an error

with self.assertRaises(ValueError):
idcrw(BytesIO(b'asldfkjasdklfj'))
structerrors = [
# Bad CRW/TIFF files
b'II\x1a\x00ldfkjasdklfj',
]
for i in structerrors:
with self.subTest(filebytes=repr(i)), self.assertRaises(struct.error):
idcrw(BytesIO(i))

with self.assertRaises(ValueError):
idcrw(BytesIO(b'IIldfkjasdklfj'))
valueerrors = [
# Generic bad file
b'asldfkjasdklfj',

with self.assertRaises(struct.error):
idcrw(BytesIO(b'II\x1a\x00ldfkjasdklfj'))
# Bad CRW/TIFF files
b'IIldfkjasdklfj',
b'II\x1a\x00ldfkjasdklfjasoijeflsdkfjsldkfj',
b'II\x1a\x00\x00\x00HEAPldfkjasdklfjasoijeflsdkfjsldkfj',
b'II\x1a\x00\x00\x00HEAPCCDRldfkjasdklfjasoijeflsdkfjsldkfj',

with self.assertRaises(ValueError):
idcrw(BytesIO(b'II\x1a\x00ldfkjasdklfjasoijeflsdkfjsldkfj'))
b'II\x2a\x00\x00\x00\x00\x00CRldfkjasdklfjasoijeflsdkfjsldkfj',

with self.assertRaises(ValueError):
idcrw(BytesIO(b'II\x1a\x00\x00\x00HEAPldfkjasdklfjasoijeflsdkfjsldkfj'))
# Bad JPEG/JFIF/EXIF files
b'\xff\xd8',
b'\xff\xd8\xff\xd9',
b'\xff\xd8\xff\xd9',
]

with self.assertRaises(ValueError):
idcrw(BytesIO(b'II\x1a\x00\x00\x00HEAPCCDRldfkjasdklfjasoijeflsdkfjsldkfj'))
for i in valueerrors:
with self.subTest(filebytes=repr(i)), self.assertRaises(ValueError):
idcrw(BytesIO(i))

def test_crw(self):
with open(self.fixtures / 'RAW_CANON_G2.CRW', 'rb') as fp:
@@ -1231,4 +1234,13 @@ class _TestCRW(unittest.TestCase):

self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.ExposureTime][0], Fraction(1, 200))

print(repr(ci))
#print(repr(ci))

def test_jpegexif(self):
with open(self.fixtures / 'exif.jpeg', 'rb') as fp:
ci = idcrw(fp)

self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.ISOSpeedRatings][0], 100)
self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.UserComment], b'UNICODE\x00' + 'abc123สวัสดี'.encode('utf-16-be'))
self.assertEqual(ci[0][TIFFTag.ExifIFDPointer][ExifTag.LensMake], b'Random Lens Maker\x00')
self.assertEqual(ci[0][TIFFTag.ImageDescription], b'Some comment\x00')

Loading…
Cancel
Save