# Pre-process CMIP tas projections

Downloaded from cmip6.science.unimelb.edu.au

## Authors

All notebooks and plots in this repository were generated by the following authors:

- Zebedee Nicholls zebedee.nicholls@climate-energy-college.org
- Jared Lewis jared.lewis@climate-resource.com
- Malte Meinshausen malte.meinshausen@unimelb.edu.au

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [3]:
import glob
import os.path
import zipfile

import netcdf_scm.io
import netcdf_scm.retractions
import scmdata
import tqdm.autonotebook as tqdman

import utils
import utils.cmip

<IPython.core.display.Javascript object>

  import tqdm.autonotebook as tqdman


<IPython.core.display.Javascript object>

In [4]:
OUT_FILE = os.path.join(
    utils.DATA_DIR,
    "processed",
    "cmip6-science-unimelb",
    "cmip_data_tas_projections.nc",
)
os.makedirs(os.path.dirname(OUT_FILE), exist_ok=True)
OUT_FILE

'/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/processed/cmip6-science-unimelb/cmip_data_tas_projections.nc'

<IPython.core.display.Javascript object>

In [5]:
# template for file to save the downloaded zip in
ZIP_DIR = os.path.join(
    utils.DATA_DIR,
    "raw",
    "cmip6-science-unimelb",
)
ZIP_DIR

'/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb'

<IPython.core.display.Javascript object>

In [6]:
# directory in which to extra data
DATA_DIR = os.path.join(ZIP_DIR, "extracted")
DATA_DIR

'/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted'

<IPython.core.display.Javascript object>

In [7]:
zip_files = glob.glob(os.path.join(ZIP_DIR, "*.zip"))
zip_files

['/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_ssp434_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_rcp85_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_ssp460_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_ssp119_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_ssp534-over_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_rcp45_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/figure_y_ssp245_cmip_data.zip',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/

<IPython.core.display.Javascript object>

In [8]:
for zf in tqdman.tqdm(zip_files):
    with zipfile.ZipFile(zf, "r") as zip_ref:
        zip_ref.extractall(DATA_DIR)

  0%|          | 0/12 [00:00<?, ?it/s]

<IPython.core.display.Javascript object>

In [9]:
all_files = glob.glob(os.path.join(DATA_DIR, "**", "*.MAG"), recursive=True)
print(f"{len(all_files)} available files")
all_files[:3]

1510 available files


['/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r2i1p1f1/Amon/tas/gn/v20190624/netcdf-scm_tas_Amon_BCC-ESM1_ssp370_r2i1p1f1_gn_1850-2055.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r3i1p1f1/Amon/tas/gn/v20190702/netcdf-scm_tas_Amon_BCC-ESM1_ssp370_r3i1p1f1_gn_1850-2055.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1p1f1/Amon/tas/gn/v20190624/netcdf-scm_tas_Amon_BCC-ESM1_ssp370_r1i1p1f1_gn_1850-2055.MAG']

<IPython.core.display.Javascript object>

In [10]:
retracted_table = netcdf_scm.retractions.check_depends_on_retracted(
    all_files, raise_on_mismatch=False, esgf_query_batch_size=10
)
retracted_table.loc[
    retracted_table["dependency_retracted"], "mag_file"
].unique().tolist()

  0%|          | 0/1510 [00:00<?, ?it/s]

Querying ESGF (submitting jobs):   0%|          | 0/294 [00:00<?, ?it/s]

Retrieving results from ESGF jobs:   0%|          | 0/294 [00:00<?, ?it/s]

['/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r4i1p1f1/Amon/tas/gn/v20190308/netcdf-scm_tas_Amon_MRI-ESM2-0_ssp245_r4i1p1f1_gn_1850-2030.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r2i1p1f1/Amon/tas/gn/v20190308/netcdf-scm_tas_Amon_MRI-ESM2-0_ssp245_r2i1p1f1_gn_1850-2030.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r3i1p1f1/Amon/tas/gn/v20190308/netcdf-scm_tas_Amon_MRI-ESM2-0_ssp245_r3i1p1f1_gn_1850-2030.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/cmip6-science-unimelb/extracted/average-year-mid-year/CMIP6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r5i1p1f1/Amon

<IPython.core.display.Javascript object>

In [11]:
db = scmdata.run_append(
    [
        netcdf_scm.io.load_mag_file(
            f, drs="CMIP6Output" if "CMIP6" in f else "MarbleCMIP5"
        ).filter(region="World")
        for f in tqdman.tqdm(all_files)
    ]
)
db.metadata = {}
db

  0%|          | 0/1510 [00:00<?, ?it/s]

<scmdata.ScmRun (timeseries: 1510, timepoints: 451)>
Time:
	Start: 1850-07-01T00:00:00
	End: 2300-07-01T00:00:00
Meta:
	      activity_id climate_model member_id mip_era        model region  \
	0      AerChemMIP      BCC-ESM1  r2i1p1f1   CMIP6  unspecified  World   
	1      AerChemMIP      BCC-ESM1  r3i1p1f1   CMIP6  unspecified  World   
	2      AerChemMIP      BCC-ESM1  r1i1p1f1   CMIP6  unspecified  World   
	3     ScenarioMIP    ACCESS-CM2  r2i1p1f1   CMIP6  unspecified  World   
	4     ScenarioMIP    ACCESS-CM2  r3i1p1f1   CMIP6  unspecified  World   
	...           ...           ...       ...     ...          ...    ...   
	1505        cmip5    HadGEM2-CC    r2i1p1   CMIP5  unspecified  World   
	1506        cmip5    HadGEM2-CC    r3i1p1   CMIP5  unspecified  World   
	1507        cmip5  GISS-E2-R-CC    r1i1p1   CMIP5  unspecified  World   
	1508        cmip5  IPSL-CM5A-MR    r1i1p1   CMIP5  unspecified  World   
	1509        cmip5     MRI-CGCM3    r1i1p1   CMIP5  unspecified  Wo

<IPython.core.display.Javascript object>

In [12]:
EXTRA_CMIP_FILES_DIR = os.path.join(utils.DATA_DIR, "raw", "zeb-quick-crunch")
!cat {os.path.join(EXTRA_CMIP_FILES_DIR, "README.txt")}

# Zeb quick crunched data

A couple of extra data files which didn't make it 
through the cmip6.science.unimelb.edu.au crunching
for reasons described at https://gitlab.com/netcdf-scm/netcdf-scm/-/issues/56.



<IPython.core.display.Javascript object>

In [13]:
extra_cmip_files = glob.glob(
    os.path.join(utils.DATA_DIR, "raw", "zeb-quick-crunch", "**", "*ssp*.MAG"),
    recursive=True,
)
extra_cmip_files

['/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/zeb-quick-crunch/output-stitched/CMIP6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp585/r4i1p1f2/Amon/tas/gn/v20210205/netcdf-scm_tas_Amon_UKESM1-0-LL_ssp585_r4i1p1f2_gn_1850-2300.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/zeb-quick-crunch/output-stitched/CMIP6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp126/r4i1p1f2/Amon/tas/gn/v20190507/netcdf-scm_tas_Amon_UKESM1-0-LL_ssp126_r4i1p1f2_gn_1850-2300.MAG',
 '/data2/ubuntu-znicholls/ar6-wg1-plots-and-processing/src/utils/../../data/raw/zeb-quick-crunch/output-stitched/CMIP6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp534-over/r4i1p1f2/Amon/tas/gn/v20210205/netcdf-scm_tas_Amon_UKESM1-0-LL_ssp534-over_r4i1p1f2_gn_1850-2300.MAG']

<IPython.core.display.Javascript object>

In [14]:
extra_cmip_data = scmdata.run_append(
    [netcdf_scm.io.load_mag_file(f, drs="CMIP6Output") for f in extra_cmip_files]
)
extra_cmip_data.metadata = {}
extra_cmip_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,time,1850-07-01 00:00:00,1851-07-01 00:00:00,1852-07-01 00:00:00,1853-07-01 00:00:00,1854-07-01 00:00:00,1855-07-01 00:00:00,1856-07-01 00:00:00,1857-07-01 00:00:00,1858-07-01 00:00:00,1859-07-01 00:00:00,...,2291-07-01 00:00:00,2292-07-01 00:00:00,2293-07-01 00:00:00,2294-07-01 00:00:00,2295-07-01 00:00:00,2296-07-01 00:00:00,2297-07-01 00:00:00,2298-07-01 00:00:00,2299-07-01 00:00:00,2300-07-01 00:00:00
activity_id,climate_model,member_id,mip_era,model,region,scenario,unit,variable,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
ScenarioMIP,UKESM1-0-LL,r4i1p1f2,CMIP6,unspecified,World,ssp585,K,tas,286.961,286.787,286.892,287.094,286.866,286.759,286.711,286.762,286.823,286.874,...,299.14,299.044,299.086,299.055,299.068,299.031,299.115,298.978,299.085,299.145
ScenarioMIP,UKESM1-0-LL,r4i1p1f2,CMIP6,unspecified,World,ssp126,K,tas,286.961,286.787,286.892,287.094,286.866,286.759,286.711,286.762,286.823,286.874,...,289.006,288.895,289.019,289.052,288.918,289.041,289.142,289.204,289.214,289.192
ScenarioMIP,UKESM1-0-LL,r4i1p1f2,CMIP6,unspecified,World,ssp534-over,K,tas,286.961,286.787,286.892,287.094,286.866,286.759,286.711,286.762,286.823,286.874,...,289.219,289.099,289.244,289.129,289.119,289.143,289.043,289.121,289.107,289.089


<IPython.core.display.Javascript object>

In [15]:
for (scenario, member_id, climate_model), sdf in extra_cmip_data.timeseries().groupby(
    ["scenario", "member_id", "climate_model"]
):
    db = db.filter(
        scenario=scenario, member_id=member_id, climate_model=climate_model, keep=False
    ).append(sdf)

<IPython.core.display.Javascript object>

In [16]:
# keep only world and drop metadata which is irrelevant
db = db.filter(region="World").drop_meta(["activity_id", "mip_era"])

<IPython.core.display.Javascript object>

In [17]:
db.to_nc(
    OUT_FILE,
    dimensions=("region", "scenario", "climate_model", "member_id"),
    #     extras=(),
)

<IPython.core.display.Javascript object>