# Download and process modern SST range and mollusk occurrences
# Project "chemical data"

require(tidyverse)
require(RColorBrewer)
require(robis)

# ------------------------------------------------------------------------------
# NOAA ERSST
# ------------------------------------------------------------------------------

# Download ERSST data from the NOAA website in ASCII format
for(yr in 1981:2010){
    download.file(url = paste("https://www.ncei.noaa.gov/pub/data/cmb/ersst/v5/ascii/ersst.v5.", yr, ".asc", sep = ""),
        destfile = paste("10_NOAA_data/ersst_v5_", yr, ".asc", sep = ""),
        quiet = TRUE
    )
}

# Process ASCII files and create plots
globalSST <- data.frame(
    longitude = NA,
    latitude = NA,
    year = NA,
    month = NA,
    SST = NA
)

for(yr in 1981:2010){
    dat <- read.table(paste("10_NOAA_data/ersst_v5_", yr, ".asc", sep = "")) # Load data
    dat[dat == -9999] <- NA # Remove land values (default as -9999)
    dat <- dat / 100 # Convert to degrees C
    colnames(dat) <- seq(-88, 88, 2) # Assign latitude values
    dat$longitude <- rep(seq(0, 358, 2), 12) # Assign longitude values
    dat$month <- rep(month.abb[1:12], each = 180) # Assign months
    dat_long <- pivot_longer(dat, # Lengthen data
        cols = 0:89,
        names_to = "latitude",
        values_to = "SST"
    )
    dat_long$latitude <- as.numeric(dat_long$latitude) # Change latitude values to numeric
    dat_long$year <- yr # Add year
    globalSST <- full_join( # merge to global dataframe
        globalSST,
        dat_long
    )

    # Create monthly SST plots
    monthplots <- list()
    for(m in as.character(unique(dat_long$month))){
        monthdat <- filter(dat_long, month == m) # isolate data per month
        monthplots[[m]] <- ggplot(monthdat) + # Plot monthly SST maps
            geom_tile(aes(x = longitude,
                y = latitude,
                fill = SST)) +
            scale_fill_gradientn(colours = brewer.pal(11, "RdYlBu")[c(11, 6, 1)],
                values = c(0, 15, 30) / 30,
                limits = c(0, 30)) +
            ggtitle(paste("NOAA ERSST SST\nmonth = ", m, sep = ""))
    }
    
    # Export monthly SST plots for all models
    pdf(paste("10_NOAA_data/NOAA_ERSST_monthplots/NOAA_ERSST_", yr, ".pdf", sep = ""))
    for(i in 1:length(monthplots)){
        print(monthplots[[i]])
    }
    dev.off()
}

globalSST <- globalSST[apply(globalSST, 1, function(x) !all(is.na(x))), ] # Remove all NA rows
write.csv(globalSST, "10_NOAA_data/globalSST_data.csv", row.names = FALSE) # Export monthly data from the NOAA dataset for the reference period (1981-2010)

# Calculate seasonal statistics
NOAA_ERSST_stats_season <- globalSST |>
    group_by(month, longitude, latitude) |>
    summarize(
        seasonnr = first(month),
        N = n(),
        mean = mean(SST, na.rm = TRUE),
        sd = sd(SST ^ 2, na.rm = TRUE),
        se = sd / sqrt(N),
        CL95 = qt(0.95, N - 1) * se,
        max = mean + CL95,
        min = mean - CL95
    )

write.csv(NOAA_ERSST_stats_season, "10_NOAA_data/globalSST_season_data.csv", row.names = FALSE)

# ------------------------------------------------------------------------------
# OBIS dataset
# ------------------------------------------------------------------------------

# OBIS (YEAR) Ocean Biodiversity Information System. Intergovernmental Oceanographic Commission of UNESCO. www.obis.org.
# OBIS (YEAR) [Data e.g. Distribution records of Eledone cirrhosa (Lamarck, 1798)] [Dataset] (Available: Ocean Biodiversity Information System. Intergovernmental Oceanographic Commission of UNESCO. www.obis.org. Accessed: YYYY-MM-DD)

# Extract bivalve occurrences from the OBIS dataset
bivalve_occurrences <- occurrence(
    scientificname = "Bivalvia",
    fields = c(
        "species",
        "decimalLongitude",
        "decimalLatitude",
        "minimumDepthInMeters",
        "maximumDepthInMeters"
    )
)

# Export occurrence list
write.csv(bivalve_occurrences, "10_OBIS_data/bivalve_occurrences.csv", row.names = FALSE)

# Find all combinations of whole latitude and longitude values in bivalve dataset
bivalve_grid <- data.frame(
    longitude = 2 * round((bivalve_occurrences$decimalLongitude %% 360) / 2),
    latitude = 2 * round(bivalve_occurrences$decimalLatitude / 2)
) |>
unique()

# Find max and min temperature for each combination
bivalve_grid <- bivalve_grid |>
    mutate(
        maxmonth = NA,
        maxT = NA,
        minmonth = NA,
        minT = NA,
        N = NA
    )

# Track progress with progress bar
# from https://r-coder.com/progress-bar-r/
pb <- txtProgressBar(min = 0,
    max = nrow(bivalve_grid),
    style = 3,
    width = 50,
    char = "="
)

for(cell in 1:nrow(bivalve_grid)){
    # Isolate grid cell
    seasons <- filter(
        globalSST,
        longitude == bivalve_grid$longitude[cell],
        latitude == bivalve_grid$latitude[cell]
    )
    # Extract extreme months and temperatures
    if(!all(is.na(seasons$SST))){
        bivalve_grid$minT[cell] <- min(seasons$SST, na.rm = TRUE)
        bivalve_grid$minmonth[cell] <- seasons$month[which.min(seasons$SST)]
        bivalve_grid$maxT[cell] <- max(seasons$SST, na.rm = TRUE)
        bivalve_grid$maxmonth[cell] <- seasons$month[which.max(seasons$SST)]
    }
    # Extract number of occurrences in grid cell
    bivalve_grid$N[cell] <- nrow(
        filter(bivalve_occurrences,
            2 * round(decimalLongitude / 2) + 180 == bivalve_grid$longitude[cell],
            2 * round(decimalLatitude / 2) == bivalve_grid$latitude[cell]
        )
    )
    setTxtProgressBar(pb, cell)
}

# Export monthly extremes
write.csv(bivalve_grid, "10_OBIS_data/bivalve_monthly_extremes.csv", row.names = FALSE)