#RS data
rm(list=ls())
library(swfscMisc)
library(tibble)
library(ppcor)
library(broom)
library(trend)
library(ridge)
library(terra)
library(foreach)
library(doParallel)
library(TDPanalysis)   # date.to.DOY()

#===========================================================
#determine the optimal length of preseason
#The TRP (a particular temperature‐relevant period ) was defined as the period (ranging between 8 and 120 days, with 8 day step) before the mean LSD at each site
#optimal length of preseason determined by the maximum partial correlation between LSD and temperature
#Fu 2019 GCB (120 days)
pres_clim_fun_tmp <- function(yysrt,yyend,dfsmean,dfs.site,tmp.site,rad.site,ppt.site){ #s1: sequence for paratial correlation calculation by steps
  s1  <- seq(8,120,8) #the period (ranging between 8 and 120 days, with 8 day steps) before the mean leaf‐out date at each grid
  idx.dfs <- paste0("X",yysrt:yyend,rep(sprintf("%03d",dfsmean),15)) #year+DOY
  idx.dfs <- match(idx.dfs,colnames(tmp.site))
  pcor <- c()
  pvalue.pcor <- c()
  for (span in s1) {
    srt.idx <- idx.dfs - span + 1
    end.idx <- idx.dfs
    
    tmp.span <- c() #mean temperature during a preseason period
    rad.span <- c()
    ppt.span <- c()
    day.span <- c()
    for (ii in 1:length(srt.idx)){
      tmp.span <- append(tmp.span,apply(tmp.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
      rad.span <- append(rad.span,apply(rad.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
      ppt.span <- append(ppt.span,apply(ppt.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
    }
    dat  <- cbind(dfs.site,tmp.span,rad.span,ppt.span)
    dat  <- as.data.frame(dat)
    pcor.t <- pcor.test(dat$dfs.site,dat$tmp.span,dat[,c("rad.span","ppt.span")],method = "pearson")
    temp1  <- pcor.t[1,1] #estimate partial correlation
    temp2  <- pcor.t[1,2] #p value
    pcor   <- append(pcor,temp1)
    pvalue.pcor <- append(pvalue.pcor,temp2)
  }
  
  idx.max  <- which.max(abs(pcor)) # the indices of the maximum absolute partial correlation value 
  pcor.max <- pcor[idx.max]
  pvalue.pcor.max <- pvalue.pcor[idx.max]
  span.max <- s1[idx.max]
  
  srt.idx <- idx.dfs - span.max + 1
  end.idx <- idx.dfs
  tmp.span <- c() #mean temperature during a optimal preseason period
  rad.span <- c()
  ppt.span <- c()
  for (ii in 1:length(srt.idx)){
    tmp.span <- append(tmp.span,apply(tmp.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
    rad.span <- append(rad.span,apply(rad.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
    ppt.span <- append(ppt.span,apply(ppt.site[srt.idx[ii]:end.idx[ii]],1,mean)[[1]])
  }
  dat  <- cbind(dfs.site,tmp.span,rad.span,ppt.span)
  dat  <- as.data.frame(dat)
  dat$tmp.span <- round(dat$tmp.span,2)
  dat$rad.span <- round(dat$rad.span,2)
  dat$ppt.span <- round(dat$ppt.span,4)
  dat$span <- span.max
  dat$pcor <- pcor.max 
  dat$pvalue.pcor <- pvalue.pcor.max
  return(dat)
}

#----------------------------------------------------
#estimate trend, p.value
trend.lls <- function(dat) {
  dat  <- as.numeric(dat)
  if (na.count(dat) <= 1) {
    time <- 1:length(dat)
    lm   <- lm(dat~time)
    slope  <- round(summary(lm)$coefficients[2,1],3)
    pvalue <- as.numeric(glance(lm)[1,"p.value"])
    result <- round(as.numeric(cbind(slope,pvalue)),3)
  } else {
    result <- c(NA,NA)
  }
  return(result)
}
#===========================================================
#===========================================================
#===========================================================
#temperature, MSWX daily data set
tmppath <- "F:/Data/Climate/MSWX_V100/daily/past/Temp/0.083deg/"

#radiation
radpath <- "F:/Data/Climate/MSWX_V100/daily/past/SWd/0.083deg/"

#precipitation
pptpath <- "F:/Data/Climate/MSWX_V100/daily/past/P/0.083deg/"

#EOS path, raster
phnpath <- "D:/Work/Warming_cooling_aut/data/GIMMS_EOS/"

#out path
outpath <- "D:/Work/Warming_cooling_aut/process/process0611/GIMMS/1-sensitivity/spatial/"
#===========================================================
# autumn temperature trend
slope_tmp <- rast("D:/Work/Warming_cooling_aut/data/MSWX/autumn_tmp_trend_Sep_Nov/slope_autumn_tmp_2004_2018.tif")
pvalue_tmp <- rast("D:/Work/Warming_cooling_aut/data/MSWX/autumn_tmp_trend_Sep_Nov/pvalue_autumn_tmp_2004_2018.tif")
idx <- init(slope_tmp,"cell")
lat <- init(slope_tmp,"y")
lon <- init(slope_tmp,"x")
lc  <- rast("D:/Work/Warming_cooling/Data/auxiliary/MCD12C1/MCD12C1.A2018001.006.2019200161458.tif") 


df <- rast(list(idx,lat,lon,slope_tmp,pvalue_tmp))
df <- as.data.frame(df)
colnames(df) <- c("grididx","gridlat","gridlon","slope_tmp_autumn","pvalue_tmp_autumn")
df <- df[df$gridlat >= 30,]

geo <- vect(df,geom=c("gridlon","gridlat"),crs="+proj=longlat +datum=WGS84")
df <- add_column(df,lc = extract(lc,geo)[,2],.after = "gridlon")
df <- df[df$lc %in% c(1:5),]
df <- na.omit(df)
inf<- df[,c("grididx","gridlon","gridlat")] # update
geo <- vect(df,geom=c("gridlon","gridlat"),crs="+proj=longlat +datum=WGS84")
#===========================================================
#===========================================================
yysrt <- 2004
yyend <- 2018
dates <- seq(from=as.Date(paste0(yysrt,"-01-01")),to=as.Date(paste0(yyend,"-12-31")),by='1 day')
dates <- gsub("-","/",dates)
dates <- paste0(substr(dates,1,4),sprintf("%03d",date.to.DOY(dates, format = "yyyy/mm/dd")))
#daily temperature raster brick
setwd(tmppath)
tmpras   <- paste0(dates,".tif")
tmpras   <- rast(tmpras)
names(tmpras) <- dates
#daily radiation raster brick
setwd(radpath)
radras <- paste0(dates,".tif")
radras <- rast(radras)
names(radras) <- dates
#daily precipitation raster brick
setwd(pptpath)
pptras <- paste0(dates,".tif")
pptras <- rast(pptras)
names(pptras) <- dates

#dfs
setwd(phnpath)
dfs <- paste0("EOS_mean_",c(yysrt:yyend),".tif")
dfs <- rast(dfs)
dfs <- extract(dfs,geo)
dfs <- data.frame(sapply(dfs, function(x) ifelse(is.nan(x), NA, x)))
dfs <- dfs[,-1]
dfs <- round(dfs)
colnames(dfs) <- paste0("EOS",yysrt:yyend)
dfs <- cbind(inf,dfs)
dfs <- na.omit(dfs)
inf <- dfs[,c("grididx","gridlon","gridlat")] #update
geo <- vect(inf,geom=c("gridlon","gridlat"),crs="+proj=longlat +datum=WGS84")

# span count
int_count <- 30000
for_count <- ceiling(dim(dfs)[1]/int_count)

for (tt in 1:for_count){
  time1 <- Sys.time()
  print(tt)
  if (tt == for_count){
    srtidx <- (tt - 1) * int_count + 1
    endidx <- dim(inf)[1]
  } else {
    srtidx <- (tt - 1) * int_count + 1
    endidx <- tt  * int_count
  }
  
  inf1 <- inf[srtidx:endidx,]
  geo1 <- vect(inf1,geom=c("gridlon","gridlat"),crs="+proj=longlat +datum=WGS84")
  
  
  # dfs
  dfs1 <- dfs[srtidx:endidx,]
  
  #extract daily temperature based on lat/lon
  tmp <- extract(tmpras,geo1)
  tmp <- data.frame(sapply(tmp, function(x) ifelse(is.nan(x), NA, x)))
  tmp <- tmp[,-1]
  tmp <- round(tmp,2)
  tmp <- cbind(inf1,tmp)
  gc() #free unused memory
  print("Ta data loaded")
  #extract daily radiation based on lat/lon
  
  rad <- extract(radras,geo1)
  rad <- data.frame(sapply(rad, function(x) ifelse(is.nan(x), NA, x)))
  rad <- rad[,-1]
  rad <- round(rad,1)
  rad <- cbind(inf1,rad)
  gc() #free unused memory
  print("Ra data loaded")
  #extract daily precipitation based on lat/lon
  
  ppt <- extract(pptras,geo1)
  ppt <- data.frame(sapply(ppt, function(x) ifelse(is.nan(x), NA, x)))
  ppt <- ppt[,-1]
  ppt <- round(ppt,2)
  ppt <- cbind(inf1,ppt)
  gc() #free unused memory
  print("PPT data loaded")
  #=============================
  #=============================
  #===========================================================
  #do parallel calculation
  numcluster <- 60
  registerDoParallel(numcluster)
  print("parallel-start")
  #===========================================================
  result <- foreach (i=1:dim(inf1)[1], .packages = c("swfscMisc","ppcor","broom","trend","ridge"), .combine=rbind) %dopar% {
    grididx <- dfs1[i,"grididx"]
    gridlat <- dfs1[i,"gridlat"]
    gridlon <- dfs1[i,"gridlon"]
    
    #predata
    dfs.site <- dfs1[dfs1$grididx == grididx,][-1:-3]
    dfs.site <- as.numeric(dfs.site)
    tmp.site <- tmp[tmp$grididx == grididx,][-1:-3]
    ppt.site <- ppt[ppt$grididx == grididx,][-1:-3]
    rad.site <- rad[rad$grididx == grididx,][-1:-3]
    
    #-----------------------------------------------------
    #dfs CV
    cv.dfs   <- round(sd(dfs.site)/mean(dfs.site),3)
    #-----------------------------------------------------
    #dfs mean
    dfsmean  <- round(mean(dfs.site))
    #-----------------------------------------------------
    #get mean climate variables during optimal preseason
    check <- tryCatch({ #error occurs
      #tmp
      dat1 <- pres_clim_fun_tmp(yysrt=yysrt,yyend=yyend,dfsmean=dfsmean,dfs.site=dfs.site,tmp.site=tmp.site,rad.site=rad.site,ppt.site=ppt.site)
      span1 <- dat1[1,"span"]
      pcor1 <- dat1[1,"pcor"]
      pvalue.pcor1 <- dat1[1,"pvalue.pcor"]
      
      #tmp
      if (pvalue.pcor1 < 0.05){
        dat1  <- dat1[,c("dfs.site","tmp.span","rad.span","ppt.span")]
        #-----------------------------------------------------
        #trends of dfs and temperature
        trend.dfs.lls1 <- trend.lls(dat1$dfs.site)
        #-----------------------------------------------------
        #linear regression
        lm.linear1 <- lm(dfs.site ~ tmp.span+rad.span+ppt.span,data=dat1)
        summary.linear1 <- summary(lm.linear1)
        #-----------------------------------------------------
        # ridge regression
        # 0-1 normalization
        dat1$tmp.span <- dat1$tmp.span-mean(dat1$tmp.span)  # anomalies 
        dat1$tmp.span <- plotrix::rescale(dat1$tmp.span,c(0,1)) # minmax normalization
        
        dat1$rad.span <- dat1$rad.span-mean(dat1$rad.span)
        dat1$rad.span <- plotrix::rescale(dat1$rad.span,c(0,1)) 
        
        dat1$ppt.span <- dat1$ppt.span-mean(dat1$ppt.span)
        dat1$ppt.span <- plotrix::rescale(dat1$ppt.span,c(0,1)) 
        
        dat1$dfs.site <- dat1$dfs.site-mean(dat1$dfs.site)
        dat1$dfs.site <- plotrix::rescale(dat1$dfs.site,c(0,1)) 
        
        fit1 <- linearRidge(dfs.site~tmp.span+rad.span+ppt.span,data=dat1,nPCs = 3) 
        summary.ridge1 <- summary(fit1)
        
        
        #-----------------------------------------------------
        temp <- c(grididx,gridlat,gridlon,
                   dfsmean,cv.dfs,
                   trend.dfs.lls1[1],trend.dfs.lls1[2],
                   span1,pcor1,pvalue.pcor1,
                   as.numeric(summary.linear1$coefficients[2,c(1,4)]),
                   as.numeric(summary.ridge1$summaries$summary1$coefficients[2,c(1,5)]))
      } else {
        temp <- c(grididx,gridlat,gridlon,
                   dfsmean,cv.dfs,
                   trend.dfs.lls1[1],trend.dfs.lls1[2],
                   span1,pcor1,pvalue.pcor1,
                   rep(NA,4))
      }
      
    }, error = function(e){
      temp <- c(grididx,gridlat,gridlon,
                dfsmean,cv.dfs,
                span1,pcor1,pvalue.pcor1,rep(NA,6))
    }, finally = function(f){
    })
    temp  <- check
  }
  #===========================================================
  stopImplicitCluster()  #stop
  #===========================================================
  #define p value of NaN to NA
  for (ii in 1:dim(result)[1]) {
    result[ii,][is.nan(result[ii,])] <- 1
  }
  result <- as.data.frame(result)
  rownames(result) <- 1:dim(result)[1]
  #add colnames
  colnames(result) <- c("grididx","gridlat","gridlon",
                        "dfsmean","dfscv",
                        "slope_dfs_lls","pvalue_dfs_lls",
                        "optimallength_tmppart","pcor_tmppart","pvalue_pcor_tmppart",
                        "sens_tmp_lm","pvalue_tmp_lm",
                        "sens_tmp_ridge","pvalue_tmp_ridge")
                        
  write.csv(result,paste0(outpath,"sensitivity_preseason_daily_",yysrt,"_",yyend,"_tt_",tt,".csv"))

  time2 <- Sys.time()
  print(time2-time1)
}
