# preparing data for specific mask (this is the only part that changes, but automatically)
cur_metadata <- analyses_metadata %>% filter(MASK == cur_mask)
speclist_path <- cur_metadata$SPECLISTDATA.PATH

# don't run if no species selected
load(speclist_path)
to_run <- (1 %in% specieslist$ht) | (1 %in% specieslist$rt) |
  (1 %in% restrictedspecieslist$ht) | (1 %in% restrictedspecieslist$rt)

if (to_run == TRUE) {

  
  # for the full country analysis, runs are split among multiple systems, and use
  # separate subsampled datasets. We need to ensure this information exists.
  # else, all 1000 runs are on one system.
  if (cur_mask == "none") {
    
    if (!exists("my_assignment")) {
      return("'my_assignment' is empty! Please specify IDs of data files assigned to you.")
    }
    
    cur_assignment <- my_assignment
    
  } else {
    
    if (!exists("my_assignment")) {
      cur_assignment <- 1:1000
    } else {
      cur_assignment <- my_assignment
    }
    
  }
  
  ###
  
  require(tidyverse)
  require(lme4)
  require(VGAM)
  require(parallel)
  require(foreach)
  require(doParallel)
  
  source('00_scripts/00_functions.R')
  
  
  load(speclist_path)
  
  lsa = specieslist %>% filter(!is.na(ht) | !is.na(rt))
  listofspecies = c(lsa$COMMON.NAME, restrictedspecieslist$COMMON.NAME)
  speclen = length(listofspecies)
  
  # creating new directory if it doesn't already exist
  if (!dir.exists(cur_metadata$TRENDS.PATHONLY)) {
    dir.create(cur_metadata$TRENDS.PATHONLY, 
               recursive = T)
  }
  
  for (k in cur_assignment)
  {
    
    # file names for individual files
    write_path <- cur_metadata %>% 
      dplyr::summarise(TRENDS.PATH = glue("{TRENDS.PATHONLY}trends_{k}.csv")) %>% 
      as.character()
    
    data_path = cur_metadata %>% 
      dplyr::summarise(SIMDATA.PATH = glue("{SIMDATA.PATHONLY}data{k}.csv")) %>% 
      as.character()
    
    
    tictoc::tic(glue("Species trends for {cur_mask}: {k}/{max(cur_assignment)}"))
    
    # read data files
    data = read.csv(data_path) %>% 
      mutate(across(.cols = c(gridg1, gridg2, gridg3, gridg4, month, timegroups),
                    ~ as.factor(.))) %>% 
      mutate(gridg = gridg3)
    
    
    # start parallel
    n.cores = parallel::detectCores()/2
    # create the cluster
    my.cluster = parallel::makeCluster(
      n.cores, 
      type = "PSOCK"
    )
    # register it to be used by %dopar%
    doParallel::registerDoParallel(cl = my.cluster)
    
    # # check if it is registered (optional)
    # foreach::getDoParRegistered()
    # # how many workers are available? (optional)
    # foreach::getDoParWorkers()
    
    trends0 = foreach(i = listofspecies, .combine = 'cbind', .errorhandling = 'remove') %dopar%
      singlespeciesrun(data = data, 
                       species = i, 
                       specieslist = specieslist, 
                       restrictedspecieslist = restrictedspecieslist)
    
    parallel::stopCluster(cl = my.cluster)
    
    trends = data.frame(trends0) %>% 
      # converting first row of species names (always true) to column names
      magrittr::set_colnames(.[1,]) %>% 
      slice(-1) %>% 
      # will always have 28 rows
      mutate(timegroupsf = rep(databins$timegroups, 2),
             timegroups = rep(databins$year, 2),
             type = rep(c("freq", "se"), each = 14),
             sl = k) %>%  # sim number
      # pivoting species names longer
      pivot_longer(-c(timegroups, timegroupsf, sl, type), 
                   values_to = "value", names_to = "COMMON.NAME") %>% 
      pivot_wider(names_from = type, values_from = value) %>% 
      # numerical ID for species names, for arranging
      mutate(sp = row_number(), .by = timegroupsf) %>%
      arrange(sl, sp) %>%
      dplyr::select(-sp) %>% 
      # reordering
      relocate(sl, COMMON.NAME, timegroupsf, timegroups, freq, se)
    
    write.csv(trends, file = write_path, row.names = F)
    
    tictoc::toc() 
    
    gc()
    
  }
  
} else {
  
  print(glue("Skipping running species trends for {cur_mask}"))
  
}