conccurency on get_image_data
This commit is contained in:
parent
e9af1c34f2
commit
b1fffc1fab
41
download.py
41
download.py
|
@ -4,6 +4,7 @@ from requests.adapters import Retry
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import concurrent.futures
|
||||||
import argparse
|
import argparse
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import writer
|
import writer
|
||||||
|
@ -32,12 +33,11 @@ def background(f):
|
||||||
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
|
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
#TODO add try/except and retry (see https://www.zenrows.com/blog/python-requests-retry#avoid-getting-blocked)
|
#@background
|
||||||
@background
|
def download(url, filepath, metadata=None):
|
||||||
def download(url, fn, metadata=None):
|
|
||||||
r = session.get(url, stream=True, timeout=6)
|
r = session.get(url, stream=True, timeout=6)
|
||||||
image = write_exif(r.content, metadata)
|
image = write_exif(r.content, metadata)
|
||||||
with open(str(fn), "wb") as f:
|
with open(str(filepath), "wb") as f:
|
||||||
f.write(image)
|
f.write(image)
|
||||||
|
|
||||||
def get_single_image_data(image_id, mly_header):
|
def get_single_image_data(image_id, mly_header):
|
||||||
|
@ -62,6 +62,31 @@ def get_image_data_from_sequences(sequences_id, mly_header):
|
||||||
image_data['sequence_id'] = sequence_id
|
image_data['sequence_id'] = sequence_id
|
||||||
yield image_data
|
yield image_data
|
||||||
|
|
||||||
|
def get_image_data_from_sequences__future(sequences_id, mly_header):
|
||||||
|
for i,sequence_id in enumerate(sequences_id):
|
||||||
|
url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
|
||||||
|
r = requests.get(url, headers=header)
|
||||||
|
data = r.json()
|
||||||
|
image_ids = data['data']
|
||||||
|
total_image = len(image_ids)
|
||||||
|
print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
|
||||||
|
print('getting images data')
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
|
future_to_url = {}
|
||||||
|
for x in range(0, total_image):
|
||||||
|
image_id = image_ids[x]['id']
|
||||||
|
future_to_url[executor.submit(get_single_image_data, image_id, mly_header)] = image_id
|
||||||
|
for future in concurrent.futures.as_completed(future_to_url):
|
||||||
|
url = future_to_url[future]
|
||||||
|
image_data = future.result()
|
||||||
|
image_data['sequence_id'] = sequence_id
|
||||||
|
#print(image_data)
|
||||||
|
yield image_data
|
||||||
|
#image_data = get_single_image_data(image_id, mly_header)
|
||||||
|
#image_data['sequence_id'] = sequence_id
|
||||||
|
#yield image_data
|
||||||
|
|
||||||
def write_exif(picture, img_metadata):
|
def write_exif(picture, img_metadata):
|
||||||
'''
|
'''
|
||||||
Write exif metadata
|
Write exif metadata
|
||||||
|
@ -95,10 +120,12 @@ if __name__ == '__main__':
|
||||||
if not os.path.exists('data'):
|
if not os.path.exists('data'):
|
||||||
os.makedirs('data')
|
os.makedirs('data')
|
||||||
|
|
||||||
for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):
|
#for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):
|
||||||
|
for i,image_data in enumerate(get_image_data_from_sequences__future(sequence_ids, header)):
|
||||||
if args.image_limit is not None and i >= args.image_limit:
|
if args.image_limit is not None and i >= args.image_limit:
|
||||||
break
|
break
|
||||||
images_data.append(image_data)
|
images_data.append(image_data)
|
||||||
|
#sys.exit()
|
||||||
|
|
||||||
print('downloading.. this process will take a while. please wait')
|
print('downloading.. this process will take a while. please wait')
|
||||||
for i,image_data in enumerate(images_data):
|
for i,image_data in enumerate(images_data):
|
||||||
|
@ -116,4 +143,6 @@ if __name__ == '__main__':
|
||||||
direction = image_data['compass_angle'],
|
direction = image_data['compass_angle'],
|
||||||
altitude = image_data['altitude'],
|
altitude = image_data['altitude'],
|
||||||
)
|
)
|
||||||
download(image_data['thumb_original_url'],path, img_metadata)
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
|
executor.submit(download, url=image_data['thumb_original_url'], filepath=path, metadata=img_metadata)
|
||||||
|
#download(image_data['thumb_original_url'],path, img_metadata)
|
||||||
|
|
Loading…
Reference in New Issue