conccurency on get_image_data

This commit is contained in:
Stefal 2023-09-14 23:27:03 +02:00
parent e9af1c34f2
commit b1fffc1fab
1 changed files with 35 additions and 6 deletions

View File

@ -4,6 +4,7 @@ from requests.adapters import Retry
import json import json
import os import os
import asyncio import asyncio
import concurrent.futures
import argparse import argparse
from datetime import datetime from datetime import datetime
import writer import writer
@ -32,12 +33,11 @@ def background(f):
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs) return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
return wrapped return wrapped
#TODO add try/except and retry (see https://www.zenrows.com/blog/python-requests-retry#avoid-getting-blocked) #@background
@background def download(url, filepath, metadata=None):
def download(url, fn, metadata=None):
r = session.get(url, stream=True, timeout=6) r = session.get(url, stream=True, timeout=6)
image = write_exif(r.content, metadata) image = write_exif(r.content, metadata)
with open(str(fn), "wb") as f: with open(str(filepath), "wb") as f:
f.write(image) f.write(image)
def get_single_image_data(image_id, mly_header): def get_single_image_data(image_id, mly_header):
@ -62,6 +62,31 @@ def get_image_data_from_sequences(sequences_id, mly_header):
image_data['sequence_id'] = sequence_id image_data['sequence_id'] = sequence_id
yield image_data yield image_data
def get_image_data_from_sequences__future(sequences_id, mly_header):
for i,sequence_id in enumerate(sequences_id):
url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
r = requests.get(url, headers=header)
data = r.json()
image_ids = data['data']
total_image = len(image_ids)
print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
print('getting images data')
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
future_to_url = {}
for x in range(0, total_image):
image_id = image_ids[x]['id']
future_to_url[executor.submit(get_single_image_data, image_id, mly_header)] = image_id
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
image_data = future.result()
image_data['sequence_id'] = sequence_id
#print(image_data)
yield image_data
#image_data = get_single_image_data(image_id, mly_header)
#image_data['sequence_id'] = sequence_id
#yield image_data
def write_exif(picture, img_metadata): def write_exif(picture, img_metadata):
''' '''
Write exif metadata Write exif metadata
@ -95,10 +120,12 @@ if __name__ == '__main__':
if not os.path.exists('data'): if not os.path.exists('data'):
os.makedirs('data') os.makedirs('data')
for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)): #for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):
for i,image_data in enumerate(get_image_data_from_sequences__future(sequence_ids, header)):
if args.image_limit is not None and i >= args.image_limit: if args.image_limit is not None and i >= args.image_limit:
break break
images_data.append(image_data) images_data.append(image_data)
#sys.exit()
print('downloading.. this process will take a while. please wait') print('downloading.. this process will take a while. please wait')
for i,image_data in enumerate(images_data): for i,image_data in enumerate(images_data):
@ -116,4 +143,6 @@ if __name__ == '__main__':
direction = image_data['compass_angle'], direction = image_data['compass_angle'],
altitude = image_data['altitude'], altitude = image_data['altitude'],
) )
download(image_data['thumb_original_url'],path, img_metadata) with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
executor.submit(download, url=image_data['thumb_original_url'], filepath=path, metadata=img_metadata)
#download(image_data['thumb_original_url'],path, img_metadata)