## Save AR6 timeseries to database

1. Download AR6 timeseries output
1. Rename
1. Save to database

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import glob
import os.path
import zipfile
from multiprocessing import Pool

import dotenv
import pandas as pd
import scmdata
import scmdata.database
import tqdm.autonotebook as tqdman

<IPython.core.display.Javascript object>

  import tqdm.autonotebook as tqdman


<IPython.core.display.Javascript object>

In [3]:
import utils

<IPython.core.display.Javascript object>

In [4]:
N_WORKERS = 32

<IPython.core.display.Javascript object>

In [5]:
variables_to_put_in_db = [
    "*GSAT*",
    "*Exceed*",
    "*Emissions*",
    "*Forcing*",
    "*Concentrations*",
]


def read_file(f):
    ts = scmdata.ScmRun(f, lowercase_cols=True)
    out = ts.filter(variable=variables_to_put_in_db)

    return out


def determine_percentile(v):
    if "Percentile" not in v:
        return "not_applicable"

    return v.split("|")[-1].replace("th Percentile", "")


def determine_climate_model(v):
    if not any([cm in v for cm in ["MAGICC", "FaIR"]]):
        return "not_applicable"

    if "Percentile" in v:
        return v.split("|")[-2]

    return v.split("|")[-1]


def strip_to_variable(v):
    out = v.replace("SR15 climate diagnostics|", "").replace(
        "AR6 climate diagnostics|", ""
    )

    if "Percentile" in out:
        out = "|".join(out.split("|")[:-1])

    if any([cm in v for cm in ["MAGICC", "FaIR"]]):
        out = "|".join(out.split("|")[:-1])

    return out

<IPython.core.display.Javascript object>

In [6]:
def save_files_to_db(flist, output_path, methodology, delete_first=True):
    print(f"{len(flist)} files to read")

    output_db = scmdata.database.ScmDatabase(
        output_path,
        levels=("variable", "percentile"),
    )
    if delete_first:
        output_db.delete()

    with Pool(processes=N_WORKERS) as pool:
        print(f"Reading files with {N_WORKERS} workers")
        raw = pool.map(read_file, files_to_read)
        print("Finished")

    raw = scmdata.run_append(raw)
    display(raw)

    mss = raw.meta[["model", "scenario"]].drop_duplicates()
    display(mss)

    to_save = raw.copy()
    to_save["percentile"] = to_save["variable"].apply(determine_percentile)
    to_save["methodology"] = methodology
    to_save["climate_model"] = to_save["variable"].apply(determine_climate_model)
    to_save["variable"] = to_save["variable"].apply(strip_to_variable)

    prefixes = ["Infilled|"]
    if any(v.startswith("Emissions") for v in to_save.get_unique_meta("variable")):
        prefixes.append("")

    for prefix in prefixes:
        co2_total_var = f"{prefix}Emissions|CO2"

        if co2_total_var in to_save.get_unique_meta("variable"):
            co2_total = to_save.filter(variable=co2_total_var)
            assert (
                co2_total.meta[["model", "scenario"]].drop_duplicates().shape[0]
                == mss.shape[0]
            )

        else:
            co2_total = (
                to_save.filter(variable=f"{prefix}Emissions|CO2|AFOLU")
                .add(
                    to_save.filter(
                        variable=f"{prefix}Emissions|CO2|Energy and Industrial Processes"
                    ),
                    op_cols={"variable": co2_total_var},
                )
                .convert_unit("Mt CO2/yr")
            )
            to_save = to_save.append(co2_total)

        co2_cumulative = (
            co2_total.interpolate(co2_total.filter(year=range(2015, 2100 + 1))["time"])
            .integrate()
            .convert_unit("Gt CO2")
        )

        to_save = to_save.append(co2_cumulative)

    display(to_save)

    output_db.save(to_save)

<IPython.core.display.Javascript object>

In [7]:
for ar6_id, methodology in (
    (utils.AR6_OUTPUT_SR15_RAW_ID, "SR1.5"),
    (utils.AR6_OUTPUT_SR15_RAW_RCMIP_PHASE_2_CONFIG_ID, "SR1.5"),
):
    files_to_read = glob.glob(
        os.path.join(utils.DATA_DIR, "raw", "ar6-output", ar6_id, "*alloutput*.xlsx")
    )

    save_files_to_db(
        files_to_read,
        os.path.join(utils.DATA_DIR, "processed", "ar6-output", "MAGICC", ar6_id),
        methodology,
    )

411 files to read
Reading files with 32 workers
Finished


<scmdata.ScmRun (timeseries: 191937, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	                        model region            scenario       unit  \
	0               POLES_ADVANCE  World   ADVANCE_2030_WB2C   Mt BC/yr   
	1               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CH4/yr   
	2               POLES_ADVANCE  World   ADVANCE_2030_WB2C   Mt CO/yr   
	3               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CO2/yr   
	4               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CO2/yr   
	...                       ...    ...                 ...        ...   
	191932  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191933  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191934  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191935  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191936  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	
	            

Unnamed: 0,model,scenario
0,POLES_ADVANCE,ADVANCE_2030_WB2C
467,MESSAGE_V_3,GEA_Eff_1p5C_Delay2020
934,MERGE-ETL_6_0,DAC15_50
1401,GCAM_4_2,SSP5-45
1868,IMAGE_3_0_1,ADVANCE_2030_WB2C
...,...,...
189602,AIM_2_1,EMF33_tax_lo_none
190069,POLES_ADVANCE,ADVANCE_2030_Med2C
190536,AIM_2_0,SSP2-Baseline
191003,IMAGE_3_0_2,EMF33_Med2C_limbio


<scmdata.ScmRun (timeseries: 193581, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	         climate_model methodology                  model      percentile  \
	0       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	1       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	2       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	3       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	4       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	...                ...         ...                    ...             ...   
	193576  not_applicable       SR1.5                AIM_2_1  not_applicable   
	193577  not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	193578  not_applicable       SR1.5                AIM_2_0  not_applicable   
	193579  not_applicable       SR1.5            IMAGE_3_0_2  not_applicable   
	193580  not_applicab

Saving to database: 0it [00:00, ?it/s]

411 files to read
Reading files with 32 workers
Finished


<scmdata.ScmRun (timeseries: 191937, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	                        model region            scenario       unit  \
	0               POLES_ADVANCE  World   ADVANCE_2030_WB2C   Mt BC/yr   
	1               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CH4/yr   
	2               POLES_ADVANCE  World   ADVANCE_2030_WB2C   Mt CO/yr   
	3               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CO2/yr   
	4               POLES_ADVANCE  World   ADVANCE_2030_WB2C  Mt CO2/yr   
	...                       ...    ...                 ...        ...   
	191932  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191933  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191934  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191935  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	191936  REMIND-MAgPIE_1_7-3_0  World  EMF33_1.5C_cost100          K   
	
	            

Unnamed: 0,model,scenario
0,POLES_ADVANCE,ADVANCE_2030_WB2C
467,MESSAGE_V_3,GEA_Eff_1p5C_Delay2020
934,MERGE-ETL_6_0,DAC15_50
1401,GCAM_4_2,SSP5-45
1868,IMAGE_3_0_1,ADVANCE_2030_WB2C
...,...,...
189602,AIM_2_1,EMF33_tax_lo_none
190069,POLES_ADVANCE,ADVANCE_2030_Med2C
190536,AIM_2_0,SSP2-Baseline
191003,IMAGE_3_0_2,EMF33_Med2C_limbio


<scmdata.ScmRun (timeseries: 193581, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	         climate_model methodology                  model      percentile  \
	0       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	1       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	2       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	3       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	4       not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	...                ...         ...                    ...             ...   
	193576  not_applicable       SR1.5                AIM_2_1  not_applicable   
	193577  not_applicable       SR1.5          POLES_ADVANCE  not_applicable   
	193578  not_applicable       SR1.5                AIM_2_0  not_applicable   
	193579  not_applicable       SR1.5            IMAGE_3_0_2  not_applicable   
	193580  not_applicab

Saving to database: 0it [00:00, ?it/s]

<IPython.core.display.Javascript object>

## FaIR

In [12]:
for source_dir, idd, methodology in (
    (
        os.path.join(
            utils.DATA_DIR,
            "raw",
            "ar6-output",
            "20211014-sr15-run-fair",
        ),
        "sr15-processing",
        "SR1.5",
    ),
):
    files_to_read = glob.glob(os.path.join(source_dir, "*alloutput*.xlsx"))
    output_dir = os.path.join(utils.DATA_DIR, "processed", "ar6-output", "fair", idd)

    save_files_to_db(
        files_to_read,
        output_dir,
        methodology,
    )

1 files to read
Reading files with 32 workers
Finished


<scmdata.ScmRun (timeseries: 180840, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	                    model region                scenario       unit  \
	0                 AIM_2_0  World  ADVANCE_2020_1.5C-2100   Mt BC/yr   
	1                 AIM_2_0  World  ADVANCE_2020_1.5C-2100  Mt CH4/yr   
	2                 AIM_2_0  World  ADVANCE_2020_1.5C-2100   Mt CO/yr   
	3                 AIM_2_0  World  ADVANCE_2020_1.5C-2100  Mt CO2/yr   
	4                 AIM_2_0  World  ADVANCE_2020_1.5C-2100  Mt CO2/yr   
	...                   ...    ...                     ...        ...   
	180835  WITCH-GLOBIOM_4_4  World       CD-LINKS_NoPolicy          K   
	180836  WITCH-GLOBIOM_4_4  World       CD-LINKS_NoPolicy          K   
	180837  WITCH-GLOBIOM_4_4  World       CD-LINKS_NoPolicy          K   
	180838  WITCH-GLOBIOM_4_4  World       CD-LINKS_NoPolicy          K   
	180839  WITCH-GLOBIOM_4_4  World       CD-LINKS_NoPolicy          K   
	
	            

Unnamed: 0,model,scenario
0,AIM_2_0,ADVANCE_2020_1.5C-2100
440,AIM_2_0,ADVANCE_2020_Med2C
880,AIM_2_0,ADVANCE_2020_WB2C
1320,AIM_2_0,ADVANCE_2030_Med2C
1760,AIM_2_0,ADVANCE_2030_Price1.5C
...,...,...
178640,WITCH-GLOBIOM_4_4,CD-LINKS_NPi
179080,WITCH-GLOBIOM_4_4,CD-LINKS_NPi2020_1000
179520,WITCH-GLOBIOM_4_4,CD-LINKS_NPi2020_1600
179960,WITCH-GLOBIOM_4_4,CD-LINKS_NPi2020_400


<scmdata.ScmRun (timeseries: 182484, timepoints: 107)>
Time:
	Start: 1995-01-01T00:00:00
	End: 2105-01-01T00:00:00
Meta:
	         climate_model methodology              model      percentile region  \
	0       not_applicable       SR1.5            AIM_2_0  not_applicable  World   
	1       not_applicable       SR1.5            AIM_2_0  not_applicable  World   
	2       not_applicable       SR1.5            AIM_2_0  not_applicable  World   
	3       not_applicable       SR1.5            AIM_2_0  not_applicable  World   
	4       not_applicable       SR1.5            AIM_2_0  not_applicable  World   
	...                ...         ...                ...             ...    ...   
	182479  not_applicable       SR1.5  WITCH-GLOBIOM_4_4  not_applicable  World   
	182480  not_applicable       SR1.5  WITCH-GLOBIOM_4_4  not_applicable  World   
	182481  not_applicable       SR1.5  WITCH-GLOBIOM_4_4  not_applicable  World   
	182482  not_applicable       SR1.5  WITCH-GLOBIOM_4_4  not_applicabl

Saving to database: 0it [00:00, ?it/s]

<IPython.core.display.Javascript object>