remove JFIF data in case EXIF extraction fails

Signed-off-by: Matthias <matthias@pebble>
This commit is contained in:
Matthias 2024-10-27 00:45:15 +02:00
parent 795fd20030
commit 0f3a3a8d6e
No known key found for this signature in database
GPG Key ID: F141C4C1F8F39D19
2 changed files with 37 additions and 4 deletions

View File

@ -1,7 +1,8 @@
import requests
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
import json
from PIL import Image
import io
import os
import concurrent.futures
import argparse
@ -64,10 +65,41 @@ def download(url, filepath, metadata=None):
with open(str(filepath), "wb") as f:
r = session.get(url, stream=True, timeout=6)
try:
image = write_exif(r.content, metadata)
img = write_exif(r.content, metadata)
except Exception as e:
print(f"FAILED to write exif data for {filepath}. Error: {e}")
f.write(image)
print(
f"{filepath} FAILED to write exif data. Error: {e} Retrying with reduced EXIF.".replace(
"\n", " | "
)
)
# write_exif(img_byte_arr, metadata) crashes when JFIF fields present
# so here is a workaround to remove those fields with pillow
# definitely not the most elegant solution...
try:
r = session.get(url, stream=True, timeout=6)
im = Image.open(r.raw)
exif_fields = list(im.info.keys())
# print(f"{filepath} detected exif fields : {exif_fields}")
fields_to_keep = ("exif", "dpi")
for k in exif_fields:
if k not in fields_to_keep:
del im.info[k]
# print(f"{filepath} deleted exif field: {k}")
# done cleaning, now converting pillow image back to bytearray
img_byte_arr = io.BytesIO()
im.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
img = write_exif(img_byte_arr, metadata)
except Exception as e:
print(
f"{filepath} FAILED WORKAROUND. Error: {e} Saving image without EXIF data.".replace(
"\n", " | "
)
)
img = r.content
f.write(img)
print("{} downloaded {}".format(filepath, r))

View File

@ -3,3 +3,4 @@ pytz >= 2023.3
timezonefinder >=6.2.0
pyexiv2 >= 2.8.2
panoramax_cli >= 1.1.1
pillow >= 11.0.0