# Use python module function to download data.

Load needed stuff from the ecmwf_interface library 

In [3]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '..')

from ecmwf_interface.download import download_ecmwf, days_of_month, read_local_key, read_checkpoint

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Load the local API key, stored in the rood directory in the `.api_key` text file, or explicitly type it below. The first option is nicer if the notebook will be shared, so the UUID / key is not explicitly stated here. 

In [4]:
UUID, key = read_local_key('../.api_key')
print(f'uuid = {UUID}, key = {key}')

uuid = 13263, key = 8479f574-767e-4d5b-b43c-4e387c42f5a3


Define `years` and `months` to download, as well as the `times` and `variables`. The code will automatically generate a list of days, in the `YYYY-MM-DD` format, to be passed to the API for download. Depending on the amount of timestamps and variables, every single day might be up to 2-3Gb, which makes the file division by day most convnenient.


In [7]:
DURING_ACE = True
## During ACE means : 
# years =  [2016,2017]
# months = [[11,12],[1,2,3,4]] 

years =  [2016,2017]

if DURING_ACE:
    months = [[11,12],[1,2,3,4]] # range(1,13) # ACE-CRUISE [11,12],2016,   
else: 
    months = [[7,8,9,10],[5,6]] # range(1,13) # NON ACE-CRUISE

# Build days list (easier to handle)
days = []
for yi,yy in enumerate(years):
    for mm in months[yi]:
        days.append(days_of_month(yy, mm))
days = [j for i in days for j in i]

times = ['00:00','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00',
         '12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00']

if DURING_ACE:
    variables = ['10m_u_component_of_neutral_wind','10m_v_component_of_neutral_wind','air_density_over_the_oceans',
                '2m_temperature', 'boundary_layer_height','cloud_base_height','convective_precipitation',
                'convective_snowfall','friction_velocity','high_cloud_cover',
                'land_sea_mask','large_scale_precipitation','large_scale_snowfall',
                'low_cloud_cover','mean_direction_of_total_swell','mean_direction_of_wind_waves',
                'mean_square_slope_of_waves','mean_surface_latent_heat_flux','mean_surface_sensible_heat_flux',
                'medium_cloud_cover','peak_wave_period','period_corresponding_to_maximum_individual_wave_height',
                'sea_ice_cover','sea_surface_temperature','significant_height_of_total_swell',
                'significant_height_of_wind_waves','skin_temperature','total_cloud_cover',
                 'total_column_cloud_ice_water','total_column_cloud_liquid_water']
    
else:
    variables = ['10m_u_component_of_wind','10m_v_component_of_wind',
                'boundary_layer_height', 'land_sea_mask','mean_direction_of_total_swell','mean_direction_of_wind_waves',
                'mean_square_slope_of_waves', 'peak_wave_period','period_corresponding_to_maximum_individual_wave_height', 
                 'sea_surface_temperature','significant_height_of_total_swell',
                'significant_height_of_wind_waves']

This is done automatically, here it is in case one wants to know what's been dowloaded already (verbose=True), save the ouput in days to overwrite what to download

In [8]:
days = read_checkpoint(days, root_fold_data='../data/raw/ecmwf_newgpsinterpolation/', verbose=True)

Skipping 2016-11-01
Skipping 2016-11-02
Skipping 2016-11-03
Skipping 2016-11-04
Skipping 2016-11-05
Skipping 2016-11-06
Skipping 2016-11-07
Skipping 2016-11-08
Skipping 2016-11-09
Skipping 2016-11-10
Skipping 2016-11-11
Skipping 2016-11-12
Skipping 2016-11-13
Skipping 2016-11-14
Skipping 2016-11-15
Skipping 2016-11-16
Skipping 2016-11-17
Skipping 2016-11-18
Skipping 2016-11-19
Skipping 2016-11-20
Skipping 2016-11-21
Skipping 2016-11-22
Skipping 2016-11-23
Skipping 2016-11-24
Skipping 2016-11-25
Skipping 2016-11-26
Skipping 2016-11-27
Skipping 2016-11-28
Skipping 2016-11-29
Skipping 2016-11-30
Skipping 2016-12-01
Skipping 2016-12-02
Skipping 2016-12-03
Skipping 2016-12-04
Skipping 2016-12-05
Skipping 2016-12-06
Skipping 2016-12-07
Skipping 2016-12-08
Skipping 2016-12-09
Skipping 2016-12-10
Skipping 2016-12-11
Skipping 2016-12-12
Skipping 2016-12-13
Skipping 2016-12-14
Skipping 2016-12-15
Skipping 2016-12-16
Skipping 2016-12-17
Skipping 2016-12-18
Skipping 2016-12-19
Skipping 2016-12-20


Actually download the data. In this case, the download is sequential. Not sure it's possible to do parallel requests and download, but might be a nice option. The script automatically checks whether there are files in the `donwload_folder`, with the same filename pattern, and automatically skips them. This allow to resume long downloads from a starting point. Just check wether the filesize makes sense, as sometimes it might happen that a partial file is there but it's not a complete download.

In [9]:
download_ecmwf(UUID=UUID, key=key, download_folder='../data/raw/ecmwf', days=days, time_of_day=times, variables=variables)