# Process raw CloudSat files

#### Created by Megan Thompson-Munson (w/code from Leah Bertrand)

#### Last modified 25 July 2024

This script uses the cloudsat_filter.py package to read in CloudSat 2B-GEOPROF and ECMWF-AUX data. Data are combined, filtered by location (Greenland bounding box), and then saved as NetCDFs.

In [None]:
import cloudsat_filter as csf
import numpy as np
import glob
import os.path

In [None]:
# Data file paths
path_geoprof = '/pl/active/wbclimate/ftp.cloudsat.cira.colostate.edu/2B-GEOPROF.P1_R05/'
path_ecmwf = '/scratch/alpine/metm9666/project-3/ECMWF-AUX/'

In [None]:
# Loop through each year for GEOPROF
for i in range(2006,2021):
    print(i)
    
    # Get list of folders in each year (differs by year) for GEOPROF
    folders = sorted(glob.glob(path_geoprof + str(i)+'/*'))
    
    # Loop through each folder for GEOPROF
    for j in range(len(folders)):
        
        # Get list of files for GEOPROF
        files = sorted(glob.glob(folders[j] + '/*'))
        
        # Loop through each file for GEOPROF and ECMWF
        for k in range(len(files)):
            f_geoprof = files[k]
            f_ecmwf = path_ecmwf + f_geoprof[-69:-41] + '_CS_ECMWF-AUX_GRANULE_P1_R05_'+f_geoprof[-11:]
            
            # If the matching ECMWF file exists
            if os.path.isfile(f_ecmwf) == True:
                
                # If the processed netcdf does not exist yet (i.e., don't reprocess completed files)
                if os.path.isfile('/scratch/alpine/metm9666/project-3/processed-files/'+f_geoprof[-60:-41] + '.nc') == False:
                
                    # Use try since there are errors for some files  
                    try:
                        csf.export_clipped(
                            f_geoprof,
                            f_ecmwf,
                            outfile_path='/scratch/alpine/metm9666/project-3/processed-files/')
                    except:
                        print('Error for file '+f_geoprof)