In [None]:
import requests
import pandas as pd
import geopandas as gpd

r = requests.get("https://zenodo.org/api/records/7763642")
version = r.json()["metadata"]["version"]
files = {i["key"]:i["links"]["self"] for i in r.json()["files"]}
doms = gpd.read_file(files[f'SvalboxDMDb_v{version.replace(".","-")}.geojson'])
parameters = pd.read_json(files[f'SvalboxDMDb_v{version.replace(".","-")}_parameters.json'])

print(f'The SvalboxDMDb_v{version} contains {len(doms)} models and covers {round(doms.proc_coverage_area.sum(),3)} km2.')

In [None]:
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import shapely.geometry as sgeom
import geopandas as gpd
from datetime import datetime as dt
import seaborn as sns
from matplotlib.colors import LogNorm, Normalize, SymLogNorm
import matplotlib as plt
from cmcrameri import cm
import json

# Table 3, 4 and DOM statistics

In [None]:
doms["snow"] = doms["cover"].apply(lambda x: [i["Extent"] for i in json.loads(x) if i["Category"] == "Snow"][0])
doms["scree"] = doms["cover"].apply(lambda x: [i["Extent"] for i in json.loads(x) if i["Category"] == "Scree"][0])
doms["snowscree"] = doms["scree"] + doms["snow"]
doms.loc[doms["snowscree"] > 5, "snowscree"] = 5

In [None]:
pd.DataFrame(doms.groupby(by = "snowscree").count()["objectid"].rename(
    {0:"Pristine", 1:"0-20 %", 2:"20-40 %",3:"40-60 %",4:"60-80 %",5:"80-100 %"}
)).reset_index().rename({"objectid":"# DOMs","snowscree":"Outcrop condition (scree/snow)"}, axis=1)

In [None]:
pd.DataFrame(doms.groupby(by = "scree").count()["objectid"].rename(
    {0:"Pristine", 1:"0-20 %", 2:"20-40 %",3:"40-60 %",4:"60-80 %",5:"80-100 %"}
)).reset_index().rename({"objectid":"# DOMs","snowscree":"Outcrop condition (scree)"}, axis=1)

In [None]:
pd.DataFrame(doms.groupby(by = "snow").count()["objectid"].rename(
    {0:"Pristine", 1:"0-20 %", 2:"20-40 %",3:"40-60 %",4:"60-80 %",5:"80-100 %"}
)).reset_index().rename({"objectid":"# DOMs","snowscree":"Outcrop condition (snow)"}, axis=1)

In [None]:
t = [[list(d.values()) for d in json.loads(dic)] for dic in doms.geology_tags]
t2 = [[x[0] for x in v if x[0]] for v in t]
t2 = [list(set(x)) for x in t2]
t2 = pd.DataFrame(t2)
categories = pd.DataFrame(t2.apply(pd.value_counts).sum(axis=1))

In [None]:
t3 = [[x[1] for x in v if x[1]] for v in t]
t3 = [list(set(x)) for x in t3]
t3 = pd.DataFrame(t3)
subcategories = pd.DataFrame(t3.apply(pd.value_counts).sum(axis=1))

In [None]:
doms["geology_age"] = doms.geology_age.apply(lambda row: json.loads(row))
doms["geology_age"] = doms["geology_age"].apply(lambda row: [dict(t) for t in {tuple(d.items()) for d in row}] if row else None)
doms["Era"] = doms.apply(lambda row: list(set([dct["Era"] for dct in row.geology_age if "Era" in dct])) if row.geology_age else None, axis=1)
doms["Era_incQuart"] = doms.apply(lambda row: list(set([dct["Era"].replace("/Quaternary","") for dct in row.geology_age if "Era" in dct])) if row.geology_age else None, axis=1)
doms["Formation"] = doms.apply(lambda row: list(set([dct["Formation"] for dct in row.geology_age if "Formation" in dct])) if row.geology_age else None, axis=1)
doms["Group"] = doms.apply(lambda row: list(set([dct["Group"] for dct in row.geology_age if "Group" in dct])) if row.geology_age else None, axis=1)


### Categories (Table 4)

In [None]:
categories.reset_index().rename({"index":"Main geological classification", 0:"# DOMs"}, axis=1)

### Subcategories

In [None]:
subcategories.reset_index().rename({"index":"Secondary geological classification", 0:"# DOMs"}, axis=1)

### CO2 Lab

In [None]:
co2_groups = ["Rurikfjellet Formation","Agardhfjellet Formation","Janusfjellet Subgroup"]
doms["Formation"].apply(lambda x: pd.Series(x)).stack().value_counts()[co2_groups].sum()

In [None]:
co2_groups = ["WilhelmÃ¸ya Subgroup","Kapp Toscana Group","Storfjorden Subgroup"]
doms["Formation"].apply(lambda x: pd.Series(x)).stack().value_counts()[co2_groups].sum()

### Eras

In [None]:
print(doms["Era"].apply(lambda x: pd.Series(x)).stack().value_counts(),"\nMerged Cenozoic\n\n",doms["Era_incQuart"].apply(lambda x: pd.Series(x)).stack().value_counts())

### Groups

In [None]:
doms["Group"].apply(lambda x: pd.Series(x)).stack().value_counts()

### Formations

In [None]:
doms["Formation"].apply(lambda x: pd.Series(x)).stack().value_counts()

In [None]:
doms.loc[doms.svalbox_dom_id=="2021-0029","geology_age"].values

### Typelocalities

Download and extract:
https://api.npolar.no/publication/190bae68-d439-4fdd-b1e6-9e09b734eaaa/_file/a7058ba36760b92f6152bfdc759f0496

In [None]:
types = gpd.read_file(r".\NP_Stratlex_1999-TypeLocalities\TypeLoc\S_Lithostratigraphy_TypeLocalities_p_shapefile.shp").to_crs(doms.crs)
distance = 500
types["geometry"] = types.buffer(distance)
types_df = doms.overlay(types)

In [None]:
types_df[["svalbox_dom_id","location_locality","TypeLoc","NP2","NP2_txt","x33N","y33N","ID_1","URL"]]

In [None]:
type_dict = {groupname : [{"ID":row.ID, 
                 "Stratigraphy":row.Stratigrap, 
                 "Type_section":row["Type secti"],
                   "url":row.URL
                } for counter, row in group.iterrows()]
               for groupname, group in types_df.groupby("svalbox_dom_id")}

In [None]:
doms["type_locality"] = doms.apply(lambda row: type_dict[row.svalbox_dom_id] if row.svalbox_dom_id in type_dict.keys() else None, axis=1)
doms["type_locality"] = doms["type_locality"].apply(lambda row: [dict(t) for t in {tuple(d.items()) for d in row}] if row else None)
doms["type_id"] = doms.apply(lambda row: list(set([dct["ID"] for dct in row.type_locality if row.type_locality])) if row.type_locality else None, axis=1)

In [None]:
print(f'# Typelocalities within {distance} m from models: {sum(doms["type_id"].apply(lambda x: pd.Series(x)).stack().value_counts())}')
print(f'# Models covering typelocalities: {doms.loc[:,"type_id"].dropna().count()}')                                        
#df.loc[:, "type_locality"].dropna()

# Table 5

In [None]:

author_table = doms.sort_values("svalbox_dom_id").loc[:,["svalbox_dom_id", "data_author", "data_doi", "location_locality","location_land","location_island","publ_date_archived"]].reset_index(drop=True)
author_table["publ_date_archived"] = pd.to_datetime(author_table["publ_date_archived"])

def create_author_list_year(author_json,date):
    author_json = author_json.replace("'",'"')
    author_json = author_json.replace("None", "0")
    authors = list(i["name"].split(",")[0] for i in json.loads(author_json))
    try:
        authors.remove("Svalbox Team")
    except:
        pass
    if len(authors) > 1:
        authors = ", ".join(authors[:-1]) + " and " + authors[-1] 
    else:
        authors = authors[0]
    #    authors = " and ".join([authors[:-1],authors[-1]]) 
    return authors + " (" + str(date.year) + ")"

author_table["data_author"] = author_table.apply(lambda x: create_author_list_year(x.data_author, x.publ_date_archived),axis=1)
author_table["data_doi"] = author_table.apply(lambda x: "https://www.doi.org/"+x.data_doi, axis=1)
author_table["location_locality"] = author_table.apply(lambda x: f"{x.location_locality}, {x.location_land}",axis=1)

author_table

## Table 6

In [None]:
parameters