#PEP725 data
rm(list=ls())
library(swfscMisc)
library(tibble)
library(ppcor)
library(broom)
library(trend)
library(ridge)
library(terra)
library(foreach)
library(doParallel)
library(TDPanalysis)   # date.to.DOY()
#===========================================================
#===========================================================
#===========================================================
#determine the optimal length of preseason
#The TRP (a particular temperature‐relevant period ) was defined as the period (ranging between 8 and 120 days, with 8 day step) before the mean LSD at each site
#optimal length of preseason determined by the maximum partial correlation between LSD and temperature
#Fu 2019 GCB (120 days)
preseason_fun <- function(dfs.site,tmp.site,rad.site,ppt.site){ #s1: sequence for partial correlation calculation by steps
  length <- dim(dfs.site)[2]
  yysrt  <- as.numeric(colnames(dfs.site)[1])
  yyend  <- as.numeric(colnames(dfs.site)[length])
  
  dfsmean <- round(mean(as.numeric(dfs.site["day",]),na.rm=T))
  dfs.site <- as.numeric(dfs.site["day",])
  
  s1  <- seq(8,120,8) #the period (ranging between X and XX days, with X day steps) before the mean leaf‐out date at each grid
  idx.dfs <- paste0(yysrt:yyend,rep(sprintf("%03d",dfsmean),length)) #year+DOY
  idx.dfs <- as.Date(idx.dfs, format = "%Y %j")
  idx.dfs <- paste0("X",gsub("-","_",idx.dfs))
  idx.dfs <- match(idx.dfs,colnames(tmp.site))
  pcor <- c()
  pvalue.pcor <- c()
  for (span in s1) {
    srt.idx <- idx.dfs - span + 1
    end.idx <- idx.dfs
    
    tmp.span <- c() #mean temperature during a preseason period
    rad.span <- c()
    ppt.span <- c()
    day.span <- c()
    for (rr in 1:length(srt.idx)){
      tmp.span <- append(tmp.span,apply(tmp.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
      rad.span <- append(rad.span,apply(rad.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
      ppt.span <- append(ppt.span,apply(ppt.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
    }
    dat  <- cbind(dfs.site,tmp.span,rad.span,ppt.span)
    dat  <- as.data.frame(dat)
    dat  <- na.omit(dat)
    pcor.t <- pcor.test(dat$dfs.site,dat$tmp.span,dat[,c("rad.span","ppt.span")],method = "pearson")
    temp1  <- pcor.t[1,1] #estimate partial correlation
    temp2  <- pcor.t[1,2] #p value
    pcor   <- append(pcor,temp1)
    pvalue.pcor <- append(pvalue.pcor,temp2)
  }
  
  idx.max  <- which.max(abs(pcor)) # the indices of the maximum absolute partial correlation value 
  pcor.tmp.max <- pcor[idx.max]
  pvalue.pcor.tmp.max <- pvalue.pcor[idx.max]
  span.max <- s1[idx.max] #optimal length of preseason
  result1 <- c(span.max,dfsmean,pcor.tmp.max,pvalue.pcor.tmp.max)
  return(result1)
}

#get mean climate variables during a particular temperature‐relevant period 
#return a data frame including climate variables during the presaosn and
climatedat_preseason_fun <- function(yysrt,yyend,span.max,dfs.site,tmp.site,rad.site,ppt.site){ #s1: sequence for paratial correlation calculation by steps
  #length  <- yyend - yysrt +1 # 15 yrs
  length  <- 15
  dfsmean <- round(mean(as.numeric(dfs.site["day",]),na.rm=T))
  dfs.span <- dfs.site[,paste0(as.character(yysrt:yyend))]
  dfs.span <- as.numeric(dfs.span["day",])
  
  idx.dfs <- paste0(yysrt:yyend,rep(sprintf("%03d",dfsmean),length)) #year+DOY
  idx.dfs <- as.Date(idx.dfs, format = "%Y %j")
  idx.dfs <- paste0("X",gsub("-","_",idx.dfs))
  idx.dfs <- match(idx.dfs,colnames(tmp.site))
  
  srt.idx <- idx.dfs - span.max + 1
  end.idx <- idx.dfs
  tmp.span <- c() #mean temperature during a optimal preseason period
  rad.span <- c()
  ppt.span <- c()
  for (rr in 1:length(srt.idx)){
    tmp.span <- append(tmp.span,apply(tmp.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
    rad.span <- append(rad.span,apply(rad.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
    ppt.span <- append(ppt.span,apply(ppt.site[srt.idx[rr]:end.idx[rr]],1,mean)[[1]])
    
  }
  dat  <- cbind(dfs.span,tmp.span,rad.span,ppt.span)
  dat  <- as.data.frame(dat)
  dat$tmp.span <- round(dat$tmp.span,2)
  dat$rad.span <- round(dat$rad.span,2)
  dat$ppt.span <- round(dat$ppt.span,4)
  return(dat)
}
#----------------------------------------------------
#missing records replaced with NA values, dfs is a data frame with ["year","day"]
expand_records <- function(dfs){ 
  dfs   <- dfs[order(dfs$year,decreasing = F),]
  miny <- min(dfs$year)
  maxy <- max(dfs$year)
  #expand the rows for some missing years
  mat <- cbind(miny:maxy,NA)
  mat <- as.data.frame(mat)
  colnames(mat) <- c("year","XX")
  dfs <- merge(dfs,mat,by="year",all=T)
  dfs <- dfs[,-3]
  return(dfs)
}
#----------------------------------------------------
#estimate trend, p.value
trend.lls <- function(dat) {
  dat  <- as.numeric(dat)
  if (na.count(dat) <= 2) {
    dat  <- dat[!is.na(dat)]
    time <- 1:length(dat)
    lm   <- lm(dat~time)
    slope  <- round(summary(lm)$coefficients[2,1],3)
    pvalue <- as.numeric(glance(lm)[1,"p.value"])
    result <- round(as.numeric(cbind(slope,pvalue)),3)
  } else {
    result <- c(NA,NA)
  }
  return(result)
}
#===========================================================
#===========================================================
#===========================================================
#temperature, E-OBS daily data set, daily mean temperature TG
tmpnc <- "F:/Data/Climate/E-OBS/v27/tg_ens_mean_0.1deg_reg_v27.0e.nc"

#radiation, daily mean global radiation QQ
radnc <- "F:/Data/Climate/E-OBS/v27/qq_ens_mean_0.1deg_reg_v27.0e.nc"

#precipitation, daily precipitation sum RR
pptnc <- "F:/Data/Climate/E-OBS/v27/rr_ens_mean_0.1deg_reg_v27.0e.nc"

#DFS path, PEP725
phnpath <- "D:/Work/Warming_cooling_aut/process/process0404/PEP725/1-DFS/"

#out path
outpath <- "D:/Work/Warming_cooling_aut/process/process0610/PEP725/1-sensitivity_span/"
#===========================================================
inpath <- "D:/Work/Warming_cooling_aut/process/process0404/PEP725/1-DFS/"
setwd(inpath)
years_thes <- 15 #moving window length
species <- c("Aesculus hippocastanum","Betula","Fagus","Quercus robur")

for (ii in 1:length(species)){
  time1 <- Sys.time()
  print(time1)
  print(species[ii])
  data <- read.csv(paste0("PEP725_bbch94_",gsub(" ","_",species[ii]),".csv"))
  #points file
  inf <- data[,c("pep_id","lon","lat")]
  inf <- unique(inf) #site counts
  #inf <- inf[1:1000,]
  geo <- terra::vect(inf,geom=c("lon","lat"),crs="+proj=longlat +datum=WGS84")
  #extract climate data based on lat/lon
  #temperature
  dates <- seq(from=as.Date("1950-01-01"),to=as.Date("2022-12-31"),by='1 day')
  dates <- gsub("-","_",dates)
  tmp <- terra::rast(tmpnc)
  names(tmp) <- dates
  dates1 <- seq(from=as.Date("1950-01-01"),to=as.Date("2016-12-31"),by='1 day')
  tmp <- tmp[[1:length(dates1)]] # data during 1950-2016
  terra::gdalCache(size=90000)
  tmp <- terra::extract(tmp,geo)
  tmp <- data.frame(sapply(tmp, function(x) ifelse(is.nan(x), NA, x)))
  tmp <- tmp[,-1]
  tmp <- cbind(inf,tmp)
  gc() #free unused memory
  print("Ta data loaded")
  #radiation
  dates <- seq(from=as.Date("1950-01-01"),to=as.Date("2022-12-31"),by='1 day')
  dates <- gsub("-","_",dates)
  rad <- terra::rast(radnc)
  names(rad) <- dates
  dates1 <- seq(from=as.Date("1950-01-01"),to=as.Date("2016-12-31"),by='1 day')
  rad <- rad[[1:length(dates1)]] # data during 1950-2016
  terra::gdalCache(size=90000)
  rad <- terra::extract(rad,geo)
  rad <- data.frame(sapply(rad, function(x) ifelse(is.nan(x), NA, x)))
  rad <- rad[,-1]
  rad <- cbind(inf,rad)
  gc() #free unused memory
  print("Ra data loaded")
  #precipitation
  dates <- seq(from=as.Date("1950-01-01"),to=as.Date("2022-12-31"),by='1 day')
  dates <- gsub("-","_",dates)
  ppt <- terra::rast(pptnc)
  names(ppt) <- dates
  dates1 <- seq(from=as.Date("1950-01-01"),to=as.Date("2016-12-31"),by='1 day')
  ppt <- ppt[[1:length(dates1)]] # data during 1950-2016
  terra::gdalCache(size=90000)
  ppt <- terra::extract(ppt,geo)
  ppt <- data.frame(sapply(ppt, function(x) ifelse(is.nan(x), NA, x)))
  ppt <- ppt[,-1]
  ppt <- cbind(inf,ppt)
  gc() #free unused memory
  print("PPT data loaded")
  
  time2 <- Sys.time()
  print(time2)
  #===================================================
  if (dim(inf)[1] > 0) {
    #===========================================================
    #do parallel calculation
    numcluster <- 50
    registerDoParallel(numcluster)
    #===========================================================
    print("parallel-start")
    result <- foreach (i=1:dim(inf)[1], .packages = c("swfscMisc","ppcor","broom","trend","ridge"), .combine=rbind) %dopar% {
      #i=1000
      pepid  <- inf$pep_id[i]
      lat    <- inf$lat[i]
      lon    <- inf$lon[i]
      dfs    <- subset(data,pep_id == pepid)
      speciessub <- dfs$species[1]
      dfs    <- dfs[,c("year","day")]
      lower  <- median(dfs$day)-2.5*mad(dfs$day) # day is the day of year of leaf senescence
      upper  <- median(dfs$day)+2.5*mad(dfs$day)
      dfs    <- subset(dfs,day>lower&day<upper) # remove outliers
      #-----------------------------------------------------tryCatch starts
      check <- tryCatch({ #error occurs
        #prepare site data
        dfs.site <- expand_records(dfs)
        row.names(dfs.site) <- dfs.site$year
        dfs.site <- dfs.site[dfs.site$year >= 1951,]
        miny     <- min(dfs.site$year)
        dfs.site <- t(dfs.site)
        dfs.site <- as.data.frame(dfs.site)
        dfs.site <- dfs.site[2,]
        tmp.site <- tmp[tmp$pep_id == pepid,][-1:-3]
        ppt.site <- ppt[ppt$pep_id == pepid,][-1:-3]
        rad.site <- rad[rad$pep_id == pepid,][-1:-3]
        #-----------------------------------------------------
        #window-step
        num_mw <- dim(dfs.site)[2] - years_thes + 1
        result.site <- c()
        for (j in 1:num_mw){
          #print(j)
          yysrt   <- miny + (j-1)
          yyend   <- yysrt + years_thes -1
          dfs.span<- dfs.site[,as.character(yysrt:yyend)]
          pre.inf <- preseason_fun(dfs.site = dfs.span,tmp.site = tmp.site,rad.site = rad.site,ppt.site = ppt.site)
          span    <- pre.inf[1]
          dfsmean <- pre.inf[2]
          pcor    <- pre.inf[3]
          pvalue.pcor <- pre.inf[4]
          
          
          if (pvalue.pcor < 0.05) {
            #-----------------------------------------------------
            dat     <- climatedat_preseason_fun(yysrt=yysrt,yyend=yyend,span.max = span,dfs.site = dfs.site,tmp.site = tmp.site,rad.site = rad.site,ppt.site = ppt.site)
            na.span <- na.count(dat$dfs.span)
            cv.dfs  <- round(sd(dat$dfs.span,na.rm = T)/mean(dat$dfs.span,na.rm = T),3)
            #-----------------------------------------------------
            if (na.span <= 1){
              #-----------------------------------------------------
              #trend of dfs, temperature, radiation, and precipitation
              trend.dfs.lls <- trend.lls(dat$dfs.span)
              #-----------------------------------------------------
              #linear regression
              lm.linear <- lm(dfs.span ~ tmp.span+rad.span+ppt.span,data=dat)
              summary.linear <- summary(lm.linear)
              #-----------------------------------------------------
              # ridge regression
              # 0-1 normalization
              dat2 <- na.omit(dat)
              dat2$tmp.span <- dat2$tmp.span-mean(dat2$tmp.span)  # anomalies 
              dat2$tmp.span <- plotrix::rescale(dat2$tmp.span,c(0,1)) # minmax normalization
              
              dat2$rad.span <- dat2$rad.span-mean(dat2$rad.span)
              dat2$rad.span <- plotrix::rescale(dat2$rad.span,c(0,1)) 
              
              dat2$ppt.span <- dat2$ppt.span-mean(dat2$ppt.span)
              dat2$ppt.span <- plotrix::rescale(dat2$ppt.span,c(0,1)) 
              
              dat2$dfs.span <- dat2$dfs.span-mean(dat2$dfs.span)
              dat2$dfs.span <- plotrix::rescale(dat2$dfs.span,c(0,1)) 
              
              fit <- linearRidge(dfs.span~tmp.span+rad.span+ppt.span,data=dat2,nPCs = 3) 
              summary.ridge <- summary(fit)
              #------------------------------------
              # autumn, Sep-Nov
              end.idx1 <- paste0(yysrt:yyend,"_11_30")
              end.idx1 <- paste0("X",end.idx1)
              end.idx1 <- match(end.idx1,colnames(tmp.site))
              
              srt.idx1 <- paste0(yysrt:yyend,"_09_01")
              srt.idx1 <- paste0("X",srt.idx1)
              srt.idx1 <- match(srt.idx1,colnames(tmp.site))
              
              #autumn temperature
              tmp.autumn1 <- c()
              for (i in 1:length(srt.idx1)){
                tmp.autumn1 <- append(tmp.autumn1,apply(tmp.site[srt.idx1[i]:end.idx1[i]],1,mean)[[1]])
              }
              tmp.autumn1 <- round(tmp.autumn1,3)
              #slope and p value
              trend.tmp.lls.autumn1 <- trend.lls(tmp.autumn1)
              #------------------------------------
              #-----------------------------------------------------
              temp <- c(c(pepid,lon,lat),speciessub,
                        span,dfsmean,pcor,pvalue.pcor,
                        yysrt,yyend,na.span,cv.dfs,
                        trend.dfs.lls[1],trend.dfs.lls[2],
                        trend.tmp.lls.autumn1[1],trend.tmp.lls.autumn1[2],
                        as.numeric(summary.linear$coefficients[,1][2:4]),as.numeric(summary.linear$coefficients[,4][2:4]),
                        as.numeric(summary.ridge$summaries$summary1$coefficients[,1][2:4]),
                        as.numeric(summary.ridge$summaries$summary1$coefficients[,5][2:4]))
            } else {
              temp <- c(c(pepid,lon,lat),speciessub,span,dfsmean,pcor,pvalue.pcor,yysrt,yyend,na.span,rep(NA,17))
            }
          } else {
            temp <- c(c(pepid,lon,lat),speciessub,span,dfsmean,pcor,pvalue.pcor,yysrt,yyend,rep(NA,18))
          }
          #merge
          result.site <- rbind(result.site,temp)
        }
        result.id <- result.site
        #----------------------------------------------------------
      }, warning = function(w){
        result.id  <- c(c(pepid,lon,lat),speciessub,rep(NA,24))
      }, error = function(e){
        result.id <- c(c(pepid,lon,lat),speciessub,rep(NA,24))
      }, finally = function(f){
      })
      result.id  <- check
      #-----------------------------------------------------tryCatch ends
    }
    #===========================================================
    stopImplicitCluster()  #stop
  }
  #===========================================================
  #define p value of NaN to NA
  for (tt in 1:dim(result)[1]) {
    result[tt,][is.nan(result[tt,])] <- 1
  }
  result <- as.data.frame(result)
  rownames(result) <- 1:dim(result)[1]
  #add colnames
  colnames(result) <- c("pep_id","lon","lat","species",
                        "optimallength","dfsmean","pcor_tmp","pvalue_pcor_tmp",
                        "yysrt","yyend","num_NA","dfscv_win",
                        "slope_dfs_lls","pvalue_dfs_lls",
                        "slope_tmp_lls_aut1","pvalue_tmp_lls_aut1",
                        "sens_tmp_lm","sens_rad_lm","sens_ppt_llm",
                        "pvalue_tmp_lm","pvalue_rad_lm","pvalue_ppt_lm",
                        "sens_tmp_ridge","sens_rad_ridge","sens_ppt_ridge",
                        "pvalue_tmp_ridge","pvalue_rad_ridge","pvalue_ppt_ridge")
  write.csv(result,paste0(outpath,"sensitivity_",gsub(" ","_",species[ii]),"_partial_in_span_08days",".csv"))
  time3 <- Sys.time()
  print(time3)
  print(time3-time1)
  rm("data","inf","tmp","ppt","rad")
}
