mapillary_download/download.py

import requests
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
import json
import os
import concurrent.futures
import argparse
from datetime import datetime
import writer
from model import PictureType
import sys

session = requests.Session()
retries_strategies = Retry(
    total=5,
    backoff_factor=1,
    status_forcelist=[429,502, 503, 504],
    )
session.mount('https://', HTTPAdapter(max_retries=retries_strategies))

def parse_args(argv =None):
    parser = argparse.ArgumentParser()
    parser.add_argument('access_token', type=str, help='Your mapillary access token')
    parser.add_argument('--sequence_ids', type=str, nargs='*', help='The mapillary sequence id(s) to download')
    parser.add_argument('--image_ids', type=int, nargs='*', help='The mapillary image id(s) to get their sequence id(s)')
    parser.add_argument('--destination', type=str, default='data', help='Path destination for the images')
    parser.add_argument('--image_limit', type=int, default=None, help='How many images you want to download')
    parser.add_argument('--overwrite', default=False, action='store_true', help='overwrite existing images')
    parser.add_argument("-v", "--version", action="version", version="release 1.0")
    args = parser.parse_args(argv)
    if args.sequence_ids is None and args.image_ids is None:
        parser.error("Please enter at least one sequence id or image id")
    return args

def download(url, filepath, metadata=None):   
    #print(asizeof.asizeof(image)/1024, "MB")
    with open(str(filepath), "wb") as f:
        r = session.get(url, stream=True, timeout=6)
        image = write_exif(r.content, metadata)
        f.write(image)
    print("{} downloaded".format(filepath))


def get_single_image_data(image_id, mly_header):
    req_url = 'https://graph.mapillary.com/{}?fields=thumb_original_url,altitude,camera_type,captured_at,compass_angle,geometry,exif_orientation,sequence'.format(image_id)
    r = session.get(req_url, headers=mly_header)
    data = r.json()
    return data


def get_image_data_from_sequences(sequences_id, mly_header):
    for i,sequence_id in enumerate(sequences_id):
        url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
        r = requests.get(url, headers=header)
        data = r.json()
        image_ids = data['data']
        total_image = len(image_ids)
        print("{} images in sequence {} of {}  - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
        print('getting images data')
        for x in range(0, total_image):
            image_id = image_ids[x]['id']
            image_data = get_single_image_data(image_id, mly_header)
            image_data['sequence_id'] = sequence_id
            yield image_data


def get_image_data_from_sequences__future(sequences_id, mly_header):
    for i,sequence_id in enumerate(sequences_id):
        url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
        r = requests.get(url, headers=header)
        data = r.json()
        if data.get('data') == []:
            print("Empty or wrong sequence {} of {} - id : {}".format(i+1, len(sequences_id), sequence_id))
            continue
        image_ids = data['data']
        total_image = len(image_ids)
        print("{} images in sequence {} of {}  - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
        print('getting images data')

        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            future_to_url = {}
            for x in range(0, total_image):
                image_id = image_ids[x]['id']
                future_to_url[executor.submit(get_single_image_data, image_id, mly_header)] = image_id
            for future in concurrent.futures.as_completed(future_to_url):
                url = future_to_url[future]
                image_data = future.result()
                image_data['sequence_id'] = sequence_id
                #print(image_data)
                yield image_data


def write_exif(picture, img_metadata):
    '''
    Write exif metadata
    '''
    #{'thumb_original_url': 'https://scontent-cdg4-2.xx.fbcdn.net/m1/v/t6/An9Zy2SrH9vXJIF01QkBODyUbg7XSKfwL48UwHyvihSwvECGjVbG0vSw9uhxe2-Dq-k2eUcigb83buO6zo-7eVbykfp5aQIe1kgd-MJr66nU_H-o_mwBLZXgVbj5I_5WX-C9c6FxJruHkV962F228O0?ccb=10-5&oh=00_AfDOKD869DxL-4ZNCbVo8Rn29vsc0JyjMAU2ctx4aAFVMQ&oe=65256C25&_nc_sid=201bca',
    #  'captured_at': 1603459736644, 'geometry': {'type': 'Point', 'coordinates': [2.5174596904057, 48.777089857534]}, 'id': '485924785946693'}
    
    with writer.Writer(picture) as image:
        image.add_datetimeoriginal(img_metadata)
        image.add_lat_lon(img_metadata)
        image.add_altitude(img_metadata)
        image.add_direction(img_metadata)
        image.apply()
        updated_image = image.get_Bytes()

    return updated_image


if __name__ == '__main__':
    
    args = parse_args()
    sequence_ids= args.sequence_ids if args.sequence_ids is not None else []
    images_ids = args.image_ids
    access_token = args.access_token
    images_data = []
    header = {'Authorization' : 'OAuth {}'.format(access_token)}

    if images_ids:
        for image_id in images_ids:
            image_data = get_single_image_data(image_id, header)
            if 'error' in image_data:
                print("data : ", image_data)
                print("something wrong happened ! Please check your image id and/or your connection")
                sys.exit()
            else:
                sequence_ids.append(image_data.get('sequence'))

    #for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):
    for i,image_data in enumerate(get_image_data_from_sequences__future(sequence_ids, header)):
        if args.image_limit is not None and i >= args.image_limit:
            break
        if 'error' in image_data:
            print("data : ", image_data)
            print("something wrong happened ! Please check your token and/or your connection")
            sys.exit()
        images_data.append(image_data)
    #sys.exit()

    print('downloading.. this process will take a while. please wait')
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        for i,image_data in enumerate(images_data):
            # create a folder for each unique sequence ID to group images by sequence
            path_destination = os.path.join(args.destination, image_data['sequence_id'])
            if not os.path.exists(path_destination):
                os.makedirs(path_destination)
            date_time_image_filename = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000).strftime('%Y-%m-%d_%HH%Mmn%Ss%f')[:-3] + '.jpg'
            path = os.path.join(path_destination, date_time_image_filename)
            img_metadata = writer.PictureMetadata(
                    capture_time = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000),
                    longitude = image_data['geometry']['coordinates'][0],
                    latitude = image_data['geometry']['coordinates'][1],
                    picture_type = PictureType("equirectangular") if image_data['camera_type'] == 'spherical' else None,
                    direction = image_data['compass_angle'],
                    altitude = image_data['altitude'],
            )
            image_exists = os.path.exists(path)
            if not args.overwrite and image_exists:
                print("{} already exists. Skipping ".format(path))
                continue
            executor.submit(download, url=image_data['thumb_original_url'], filepath=path, metadata=img_metadata)
commit code 2023-01-20 04:19:37 +01:00			`import requests`
add retry on http connections 2023-09-14 20:57:41 +02:00			`from requests.adapters import HTTPAdapter`
			`from requests.adapters import Retry`
commit code 2023-01-20 04:19:37 +01:00			`import json`
			`import os`
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`import concurrent.futures`
commit code 2023-01-20 04:19:37 +01:00			`import argparse`
Filename for image is datetime 2023-09-10 20:04:26 +02:00			`from datetime import datetime`
WIP : using writer.py from panoramax project. 2023-09-11 22:56:21 +02:00			`import writer`
Switching to pyexiv2 2023-09-12 14:31:10 +02:00			`from model import PictureType`
Download multiple sequence 2023-09-14 00:05:12 +02:00			`import sys`
commit code 2023-01-20 04:19:37 +01:00
add retry on http connections 2023-09-14 20:57:41 +02:00			`session = requests.Session()`
			`retries_strategies = Retry(`
			`total=5,`
			`backoff_factor=1,`
			`status_forcelist=[429,502, 503, 504],`
			`)`
			`session.mount('https://', HTTPAdapter(max_retries=retries_strategies))`

commit code 2023-01-20 04:19:37 +01:00			`def parse_args(argv =None):`
			`parser = argparse.ArgumentParser()`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00			`parser.add_argument('access_token', type=str, help='Your mapillary access token')`
			`parser.add_argument('--sequence_ids', type=str, nargs='*', help='The mapillary sequence id(s) to download')`
			`parser.add_argument('--image_ids', type=int, nargs='*', help='The mapillary image id(s) to get their sequence id(s)')`
Add path destination for sequence folders 2023-09-19 21:29:49 +02:00			`parser.add_argument('--destination', type=str, default='data', help='Path destination for the images')`
create folder only when needed 2023-09-14 10:12:26 +02:00			`parser.add_argument('--image_limit', type=int, default=None, help='How many images you want to download')`
fix arg store_true 2023-09-15 11:27:29 +02:00			`parser.add_argument('--overwrite', default=False, action='store_true', help='overwrite existing images')`
add --version argument 2023-09-21 21:20:46 +02:00			`parser.add_argument("-v", "--version", action="version", version="release 1.0")`
commit code 2023-01-20 04:19:37 +01:00			`args = parser.parse_args(argv)`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00			`if args.sequence_ids is None and args.image_ids is None:`
			`parser.error("Please enter at least one sequence id or image id")`
cleaning 2023-09-20 21:28:32 +02:00			`return args`
commit code 2023-01-20 04:19:37 +01:00
fix out of memory with image.close() 2023-09-17 20:02:59 +02:00			`def download(url, filepath, metadata=None):`
			`#print(asizeof.asizeof(image)/1024, "MB")`
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`with open(str(filepath), "wb") as f:`
move inside the with block 2023-09-16 15:09:17 +02:00			`r = session.get(url, stream=True, timeout=6)`
			`image = write_exif(r.content, metadata)`
Switching to pyexiv2 2023-09-12 14:31:10 +02:00			`f.write(image)`
futures were not in loop :-/ 2023-09-14 23:48:20 +02:00			`print("{} downloaded".format(filepath))`
commit code 2023-01-20 04:19:37 +01:00
Add some documentation 2023-09-20 22:10:06 +02:00
Download multiple sequence 2023-09-14 00:05:12 +02:00			`def get_single_image_data(image_id, mly_header):`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00			`req_url = 'https://graph.mapillary.com/{}?fields=thumb_original_url,altitude,camera_type,captured_at,compass_angle,geometry,exif_orientation,sequence'.format(image_id)`
add retry on http connections 2023-09-14 20:57:41 +02:00			`r = session.get(req_url, headers=mly_header)`
Download multiple sequence 2023-09-14 00:05:12 +02:00			`data = r.json()`
			`return data`

Add some documentation 2023-09-20 22:10:06 +02:00
Download multiple sequence 2023-09-14 00:05:12 +02:00			`def get_image_data_from_sequences(sequences_id, mly_header):`
			`for i,sequence_id in enumerate(sequences_id):`
			`url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)`
			`r = requests.get(url, headers=header)`
			`data = r.json()`
			`image_ids = data['data']`
			`total_image = len(image_ids)`
fix sys.exit() 2023-09-14 00:32:11 +02:00			`print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))`
Download multiple sequence 2023-09-14 00:05:12 +02:00			`print('getting images data')`
			`for x in range(0, total_image):`
			`image_id = image_ids[x]['id']`
			`image_data = get_single_image_data(image_id, mly_header)`
			`image_data['sequence_id'] = sequence_id`
			`yield image_data`

Add some documentation 2023-09-20 22:10:06 +02:00
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`def get_image_data_from_sequences__future(sequences_id, mly_header):`
			`for i,sequence_id in enumerate(sequences_id):`
			`url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)`
			`r = requests.get(url, headers=header)`
			`data = r.json()`
check sequence content 2023-09-19 22:03:33 +02:00			`if data.get('data') == []:`
			`print("Empty or wrong sequence {} of {} - id : {}".format(i+1, len(sequences_id), sequence_id))`
			`continue`
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`image_ids = data['data']`
			`total_image = len(image_ids)`
			`print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))`
			`print('getting images data')`

			`with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:`
			`future_to_url = {}`
			`for x in range(0, total_image):`
			`image_id = image_ids[x]['id']`
			`future_to_url[executor.submit(get_single_image_data, image_id, mly_header)] = image_id`
			`for future in concurrent.futures.as_completed(future_to_url):`
			`url = future_to_url[future]`
			`image_data = future.result()`
			`image_data['sequence_id'] = sequence_id`
			`#print(image_data)`
			`yield image_data`

Add some documentation 2023-09-20 22:10:06 +02:00
Switching to pyexiv2 2023-09-12 14:31:10 +02:00			`def write_exif(picture, img_metadata):`
Write exif metadata (Quick &Dirty) 2023-09-10 21:02:33 +02:00			`'''`
			`Write exif metadata`
			`'''`
			`#{'thumb_original_url': 'https://scontent-cdg4-2.xx.fbcdn.net/m1/v/t6/An9Zy2SrH9vXJIF01QkBODyUbg7XSKfwL48UwHyvihSwvECGjVbG0vSw9uhxe2-Dq-k2eUcigb83buO6zo-7eVbykfp5aQIe1kgd-MJr66nU_H-o_mwBLZXgVbj5I_5WX-C9c6FxJruHkV962F228O0?ccb=10-5&oh=00_AfDOKD869DxL-4ZNCbVo8Rn29vsc0JyjMAU2ctx4aAFVMQ&oe=65256C25&_nc_sid=201bca',`
			`# 'captured_at': 1603459736644, 'geometry': {'type': 'Point', 'coordinates': [2.5174596904057, 48.777089857534]}, 'id': '485924785946693'}`

use a with statement to update image metadata 2023-09-17 20:07:14 +02:00			`with writer.Writer(picture) as image:`
use new metadata for offset localize 2023-09-18 22:16:07 +02:00			`image.add_datetimeoriginal(img_metadata)`
			`image.add_lat_lon(img_metadata)`
use a with statement to update image metadata 2023-09-17 20:07:14 +02:00			`image.add_altitude(img_metadata)`
			`image.add_direction(img_metadata)`
			`image.apply()`
			`updated_image = image.get_Bytes()`

fix out of memory with image.close() 2023-09-17 20:02:59 +02:00			`return updated_image`
write equirectangular projection in xmp data with pyexiv2 2023-09-11 20:21:20 +02:00
Add some documentation 2023-09-20 22:10:06 +02:00
commit code 2023-01-20 04:19:37 +01:00			`if __name__ == '__main__':`
cleaning 2023-09-20 21:28:32 +02:00
			`args = parse_args()`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00			`sequence_ids= args.sequence_ids if args.sequence_ids is not None else []`
			`images_ids = args.image_ids`
commit code 2023-01-20 04:19:37 +01:00			`access_token = args.access_token`
create folder only when needed 2023-09-14 10:12:26 +02:00			`images_data = []`
			`header = {'Authorization' : 'OAuth {}'.format(access_token)}`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00
			`if images_ids:`
			`for image_id in images_ids:`
			`image_data = get_single_image_data(image_id, header)`
			`if 'error' in image_data:`
			`print("data : ", image_data)`
			`print("something wrong happened ! Please check your image id and/or your connection")`
			`sys.exit()`
			`else:`
			`sequence_ids.append(image_data.get('sequence'))`
commit code 2023-01-20 04:19:37 +01:00
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`#for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):`
			`for i,image_data in enumerate(get_image_data_from_sequences__future(sequence_ids, header)):`
create folder only when needed 2023-09-14 10:12:26 +02:00			`if args.image_limit is not None and i >= args.image_limit:`
Download multiple sequence 2023-09-14 00:05:12 +02:00			`break`
check error in image data. It could happen if the token is wrong or disabled should fix #4 2023-09-15 20:12:54 +02:00			`if 'error' in image_data:`
			`print("data : ", image_data)`
get sequence from image id(s) 2023-09-20 12:21:14 +02:00			`print("something wrong happened ! Please check your token and/or your connection")`
Add path destination for sequence folders 2023-09-19 21:29:49 +02:00			`sys.exit()`
Download multiple sequence 2023-09-14 00:05:12 +02:00			`images_data.append(image_data)`
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`#sys.exit()`
commit code 2023-01-20 04:19:37 +01:00
uncomment code 2023-01-20 04:21:02 +01:00			`print('downloading.. this process will take a while. please wait')`
lowered download thread from 10 to 4 2023-09-15 20:13:42 +02:00			`with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:`
futures were not in loop :-/ 2023-09-14 23:48:20 +02:00			`for i,image_data in enumerate(images_data):`
			`# create a folder for each unique sequence ID to group images by sequence`
Add path destination for sequence folders 2023-09-19 21:29:49 +02:00			`path_destination = os.path.join(args.destination, image_data['sequence_id'])`
			`if not os.path.exists(path_destination):`
			`os.makedirs(path_destination)`
			`date_time_image_filename = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000).strftime('%Y-%m-%d_%HH%Mmn%Ss%f')[:-3] + '.jpg'`
			`path = os.path.join(path_destination, date_time_image_filename)`
futures were not in loop :-/ 2023-09-14 23:48:20 +02:00			`img_metadata = writer.PictureMetadata(`
			`capture_time = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000),`
			`longitude = image_data['geometry']['coordinates'][0],`
			`latitude = image_data['geometry']['coordinates'][1],`
			`picture_type = PictureType("equirectangular") if image_data['camera_type'] == 'spherical' else None,`
			`direction = image_data['compass_angle'],`
			`altitude = image_data['altitude'],`
			`)`
add argument to overwrite image 2023-09-15 11:22:11 +02:00			`image_exists = os.path.exists(path)`
			`if not args.overwrite and image_exists:`
			`print("{} already exists. Skipping ".format(path))`
			`continue`
conccurency on get_image_data 2023-09-14 23:27:03 +02:00			`executor.submit(download, url=image_data['thumb_original_url'], filepath=path, metadata=img_metadata)`