# This supplementary material is hosted by Eurosurveillance as supporting information alongside the \textit{Epidemiological and clinical insights from SARS-CoV-2 RT-PCR crossing threshold values} on behalf of the authors who remain responsible for the accuracy and appropriateness of the content. The same standards for ethics, copyright, attributions and permissions as for the article apply. Supplements are not edited by Eurosurveillance and the journal is not responsible for the maintenance of any links or email addresses provided therein.


rm(list=ls(all=TRUE)) # clears variables1 
def.par<-par(no.readonly=TRUE) # default graphical parameters

library("tidyverse") ; library("dplyr")
library("lme4") ; library('emmeans')
library("lubridate")
library('boot') ; library('gam') ; library("zoo")
library('R0') ; library('EpiEstim') ; library('incidence')
library('gdata')
library("nlme")
library("ggpubr")
library("ggsci")       # for nice color palettes
library("glmnet")
library("nnet")
library("lmerTest")
library("forecast")
select<-dplyr::select


setwd("~/work/research/R0Covid19/Ct")




##prep data
## Données de PCR
load("study_noreplicate.RData")


#filter
Du <- Df %>% dplyr::filter(
  (resultat_qualitatif == 1) # test is positive
  &(Ct>10) # remove Cts smaller than 10
  &(Ct<45) # remove Cts larger than 45
  #  & (date_prelevement>"2020-05-11") #post lock-down
  & (date_prelevement>="2020-07-01") #post summer 2020
  & (gene_cible!="ci") #remove internal control gene
  & (lieu_prelevement=="screening") # from screening
  & (age>5) # not too young
  & (age<81) # not too old
)


# scaling numerical variables
#Du$date_prelevement<-scale(as.Date(Du$date_prelevement))
Du$age<-scale(as.numeric(Du$age))
Du$Rt_scaled<-scale(as.numeric(Du$Rt_noshift)) 


## cleaning variables to drop removed factors
Du$laboratoire_analyse = drop.levels(as.factor(Du$laboratoire_analyse))
Du$technique_PCR = drop.levels(as.factor(Du$technique_PCR))
Du$gene_cible = drop.levels(as.factor(Du$gene_cible))
Du$nature_prelevement = drop.levels(as.factor(Du$nature_prelevement))
Du$lieu_prelevement = drop.levels(as.factor(Du$lieu_prelevement))



Du_original<-Du #save before further changes

####

Du<-Du%>%select(date_prelevement,Rt_shifted9,Rt_shifted5,Rt_noshift,Ct,age,gene_cible,technique_PCR,laboratoire_analyse)%>%na.omit
modele_noR2 = lm(Ct ~ age + gene_cible*technique_PCR + laboratoire_analyse,data = Du)
Du$Ct_resid<-residuals(modele_noR2)





# testing<-read_csv(file="https://covid.ourworldindata.org/data/owid-covid-data.csv")
# saveRDS(testing,"owid_covid.csv")
testing<-readRDS("owid_covid.csv")
testing<-testing%>%
  filter(location=="France"&date>="2020-07-01")%>%
  select(date,
         new_tests, 
         new_tests_per_thousand, 
         new_tests_smoothed, 
         new_tests_smoothed_per_thousand, 
         positive_rate, 
         tests_per_case, 
         tests_units)

Du<-Du%>%left_join(testing%>%
                     rename(date_prelevement=date)%>%
                     select(date_prelevement,positive_rate),by="date_prelevement")

date_range<-tibble(date_prelevement=
                     as.Date(seq(ymd(Du$date_prelevement%>%min),ymd(Du$date_prelevement%>%max), by = '1 day'))
)


Du%>%
  right_join(date_range,by="date_prelevement")%>%
  group_by(date_prelevement)%>%
  summarise(quantile = c(paste0("q",seq(0,1,0.25))),
            Ct = quantile(Ct, seq(0,1,0.25)),
            Ctresid = quantile(Ct_resid, seq(0,1,0.25)),.groups="drop")%>%
  pivot_wider(names_from = quantile,values_from=c(Ct,Ctresid))%>%
  right_join(Du%>%ungroup%>%distinct(date_prelevement,Rt_shifted9,Rt_shifted5,Rt_noshift,positive_rate),by="date_prelevement")%>%
  left_join(Du%>%right_join(date_range,by="date_prelevement")%>%group_by(date_prelevement)%>%
              summarise(Ct_skewness=timeDate::skewness(Ct),Ct_resid_skewness=timeDate::skewness(Ct_resid)),by="date_prelevement")%>%
  arrange(date_prelevement)%>%
  fill(-c("date_prelevement"))%>%
  mutate_at(vars(matches("Ct")), function(x) zoo::rollmean(x,7,align="right",fill=NA))%>%
  na.omit->df_national

#saveRDS(df_national,file="df_national_ts.rds")



###error functions
MAE<-function(error){
  out<-NULL
  for (k in 1:length(error)){
    out<-c(out,mean(abs(error[1:k]))) 
  } 
  return(out)
}

RMSE<-function(error){
  out<-NULL
  for (k in 1:length(error)){
    out<-c(out,sqrt(mean(error[1:k]^2))) 
  } 
  return(out)
}

MAPE<-function(error,test){
  out<-NULL
  # for (k in 1:length(error)){
  #   out<-c(out,mean(abs(100*error[1:k]/test[1:k]))) 
  # }
  out<-100*mean(abs(error)/test)
  return(out)
}


###evaluate prediction precision
evaluate.prediction<-function(Rtshift="Rt_noshift",horizon=7,start="2020-08-01",tune=F,ao=c(p=9,d=2,q=0),plot=F){
  
#  df_national<-readRDS(file="df_national_ts.rds")
  df_national<-df_national%>%filter(date_prelevement>=as.Date(start)-12)
  
  accuracy=NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit0<-Arima(y=hts,order=ao,include.drift=T,method="ML")
      ##predict
      frcst0<-fit0%>%forecast(h=nrow(data_test))

      ##accuracy
      error=frcst0$mean-data_test%>%pull(Rtshift)
      
      frcst0<-as_tibble(frcst0)%>%
        mutate(exo=paste0(Rtshift))%>%
        mutate(date=data_test$date_prelevement)
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }
  
  
  accuracy_all<-accuracy%>%
    mutate(exo=Rtshift)
  
  ##now with exogenous data = Ct_median
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit1<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst1<- fit1 %>%forecast(h=nrow(data_test),xreg=exo_test)

      ##accuracy
      error=frcst1$mean-data_test%>%pull(Rtshift) 
      
      frcst1<-as_tibble(frcst1)%>%
        mutate(exo=paste0(Rtshift,'+Ct_quantiles'))%>%
        mutate(date=data_test$date_prelevement)
  
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+Ct_quantiles'))
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  ##now with exogenous data = Ct_quantile + positive test rate
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1,positive_rate)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1,positive_rate)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit2<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst2<- fit2 %>%forecast(h=nrow(data_test),xreg=exo_test)
      frcst2$exo<-paste0(Rtshift,'+Ct_quantiles+pos_test_rate')
      
      ##accuracy
      error=frcst2$mean-data_test%>%pull(Rtshift) 
      
      frcst2<-as_tibble(frcst2)%>%
        mutate(exo=paste0(Rtshift,'+Ct_quantiles+pos_test_rate'))%>%
        mutate(date=data_test$date_prelevement)
      
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+Ct_quantiles+pos_test_rate'))
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  ##now with exogenous data = Ct_res_quantiles + positive test rate
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1,positive_rate)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1,positive_rate)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit3<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst3<- fit3 %>%forecast(h=nrow(data_test),xreg=exo_test)

      ##accuracy
      error=frcst3$mean-data_test%>%pull(Rtshift) 
      
      frcst3<-as_tibble(frcst3)%>%
        mutate(exo=paste0(Rtshift,'+Ct_resid_quantiles+pos_test_rate'))%>%
        mutate(date=data_test$date_prelevement)
      
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+Ct_resid_quantiles+pos_test_rate'))
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  
  ##now with exogenous data = Ct_res_quantile
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      ##get fit
      fit4<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst4<- fit4 %>%forecast(h=nrow(data_test),xreg=exo_test)
      
      ##accuracy
      error=frcst4$mean-data_test%>%pull(Rtshift) 
      
      frcst4<-as_tibble(frcst4)%>%
        mutate(exo=paste0(Rtshift,'+Ct_resid_quantiles'))%>%
        mutate(date=data_test$date_prelevement)
      
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+Ct_resid_quantiles'))
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  ##now with exogenous data = positive_rate
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(positive_rate)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(positive_rate)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit5<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst5<- fit5 %>%forecast(h=nrow(data_test),xreg=exo_test)

      ##accuracy
      error=frcst5$mean-data_test%>%pull(Rtshift) 
      
      frcst5<-as_tibble(frcst5)%>%
        mutate(exo=paste0(Rtshift,'+pos_test_rate'))%>%
        mutate(date=data_test$date_prelevement)
      
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+pos_test_rate'))
  
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  ##now with exogenous data = Ct_resid_skewness
  
  accuracy<-NULL
  
  for (b in 10:(nrow(df_national)-horizon)){
    tryCatch({
      ##set up training and test time series
      data_train = df_national[1:b, ]
      data_test = df_national[(b+1):(b+horizon), ]
      rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
      hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
      
      exo_train<-data_train%>%ungroup%>%select(Ct_resid_skewness)%>%as.matrix
      exo_test<-data_test%>%ungroup%>%select(Ct_resid_skewness)%>%as.matrix
      
      if(tune){
        ##tune arima
        hts %>% auto.arima() %>% arimaorder ->ao
      }
      print(ao)
      ##get fit
      fit6<-Arima(y=hts,xreg=exo_train,order=ao,include.drift=T,method="ML")
      ##predict
      frcst6<- fit6 %>%forecast(h=nrow(data_test),xreg=exo_test)

      ##accuracy
      error=frcst6$mean-data_test%>%pull(Rtshift) 
      
      frcst6<-as_tibble(frcst6)%>%
        mutate(exo=paste0(Rtshift,'+Ct_resid_skewness'))%>%
        mutate(date=data_test$date_prelevement)
      
      
      accuracy<-rbind(accuracy,
                      data.frame(
                        train.end=tail(data_train$date_prelevement,1),
                        breakpoint=b,
                        H=c(1:nrow(data_test)),
                        RMSE=RMSE(error),
                        MAE=MAE(error),
                        MAPE=MAPE(error,data_test%>%pull(Rtshift)))
      )
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})  
  }
  
  accuracy<-accuracy%>%
    mutate(exo=paste0(Rtshift,'+Ct_resid_skewness'))
  
  accuracy_all<-bind_rows(accuracy_all,
                          accuracy)
  
  
  ###get models with highest precision
  accuracy_all%>%
    pivot_longer(-c('train.end','H','breakpoint','exo'))%>%
    filter(H%in%c(horizon)&exo%in%c("Rt_noshift","Rt_noshift+Ct_resid_skewness","Rt_noshift+Ct_resid_quantiles","Rt_noshift+pos_test_rate"))%>%
    group_by(train.end,name,H)%>%
    slice_min(value)%>%
    rename(best.exo=exo,best.value=value)%>%
    distinct(train.end,name,H,best.exo,best.value)%>%
    mutate(date_prelevement=train.end)->best
  
  accuracy_plt<-
    accuracy_all%>%
    pivot_longer(-c('train.end','H','breakpoint','exo'))%>%
    filter(H%in%c(horizon)&
             name=="MAPE")%>%
    group_by(train.end,name,H)%>%
    left_join(best,
              by=c("train.end","H","name"))%>%
    filter(exo%in%c("Rt_noshift","Rt_noshift+Ct_resid_skewness","Rt_noshift+Ct_resid_quantiles","Rt_noshift+pos_test_rate"))%>%
    filter(best.exo%in%c("Rt_noshift","Rt_noshift+Ct_resid_skewness","Rt_noshift+Ct_resid_quantiles","Rt_noshift+pos_test_rate"))%>%
    left_join((.)%>%filter(exo==Rtshift)%>%select(train.end,name,H,value),
              by=c("train.end","name","H"))%>%
    rename(value=value.x,Rt.value=value.y)%>%
    mutate(value.relative=Rt.value-best.value)
    
  
  
  df_res<-df_national%>%
    left_join(best%>%filter(name=="MAPE"),by="date_prelevement")%>%
    separate(best.exo,c(NA,"best.exo"),sep=11)
  
  # ####plot best precision and data
   precision_range<-100-df_res%>%pull(best.value)%>%na.omit%>%range%>%round
   
   pred_compare_exo<-ggplot()+
     geom_line(data=df_national,aes(x=date_prelevement,y=(Ctresid_q0.5/30)),color='chartreuse4')+
     geom_line(data=df_national,aes(x=date_prelevement,y=(positive_rate*100/30)),color='darkorange4')+
     geom_line(data=df_national,aes(x=date_prelevement,y=(Ct_resid_skewness*100/30)),color='darkorchid4')+
     geom_line(data=df_res,aes(x=date_prelevement,y=get(Rtshift)),color='grey20',size=1)+
     geom_point(data=df_res,aes(x=date_prelevement,y=get(Rtshift),size=(100-best.value),color=best.exo))+
     scale_color_manual(values=c("grey20","chartreuse4","darkorchid4","darkorange4"),name="data sources (in addition to Rt)")+
     scale_size_continuous(name="best 7 day prediction precision (%)",breaks = c(precision_range[2]+1,90,95,98,precision_range[1]),range = c(0.5,8))+
     scale_y_continuous(sec.axis=sec_axis(~.*30),name=Rtshift)+
     theme_bw()
   
  
  dta_plots<-
  df_res%>%
    select(date_prelevement,Rt_noshift,Ctresid_q0.5,Ct_resid_skewness,positive_rate)%>%
    pivot_longer(-date_prelevement)%>%
    transform(name=factor(name,levels=c("Rt_noshift","Ctresid_q0.5","Ct_resid_skewness","positive_rate")))%>%
    ggplot()+
      geom_line(aes(x=date_prelevement,y=value))+
      theme_bw()+
      xlab("")+
      facet_wrap(~name,scales="free",ncol=1)
  
  
  
  acc_plt<-
    left_join(df_res%>%select(date_prelevement),
    accuracy_plt)%>%
    ungroup%>%
    select(date_prelevement,best.exo,name,value.relative)%>%
    mutate(name="MAPE")%>%
    ggplot()+
      geom_line(aes(x=date_prelevement,y=value.relative))+
      geom_point(aes(x=date_prelevement,y=value.relative,color=best.exo))+
      xlab("date")+
      ylab("prediction error\nimprovement (%)")+
      scale_color_discrete(name="best model")+
      facet_wrap(~name,ncol=1)+
      theme_bw()+
      theme(legend.position = c(0.8, 0.65),
            legend.background = element_rect(fill = "transparent", colour = "transparent"),
            legend.key = element_rect(fill = "transparent", colour = "transparent"),
            legend.text=element_text(size=6),
            legend.key.size = unit(0.4, "cm"),
            legend.title = element_text(size=7))

  acc_plt_Rtonly<-
    left_join(df_res%>%select(date_prelevement),
              accuracy_plt)%>%
    ungroup%>%
    select(date_prelevement,exo,best.exo,name,value)%>%
    mutate(name="MAPE")%>%
    filter(exo%in%c(NA,Rtshift))%>%
    ggplot()+
    geom_line(aes(x=date_prelevement,y=value))+
    geom_point(aes(x=date_prelevement,y=value))+
    xlab("")+
    ylab("prediction error,\n Rt only (%)")+
    facet_wrap(~name,ncol=1)+
    theme_bw()+
    theme(legend.position = c(0.8, 0.65),
          legend.background = element_rect(fill = "transparent", colour = "transparent"),
          legend.key = element_rect(fill = "transparent", colour = "transparent"))
  
  
  


  if(plot==T){
    
    pdf(file=paste0('plots/precision_learning_exo_',Rtshift,"-bestexo-tune",tune,".pdf"),width=14,height=6)
    
    print(pred_compare_exo)
    
    dev.off()
  
      
    pdf(file=paste0('plots/precision_learning_exo_panels',Rtshift,"-bestexo-tune",tune,".pdf"),width=6,height=10)
    
    print(cowplot::plot_grid(dta_plots, acc_plt_Rtonly,acc_plt,align = "v", nrow = 3, rel_heights = c(3.5/6, 1.25/6, 1.25/6)))
    
    dev.off()

  }
  
  return(df_res)
}


evaluate.prediction(plot=T,tune=F)
evaluate.prediction(plot=T,tune=T)

###some example prediction

examplePrediction<-function(Break='2020-08-15',start=NA,K=14,H=7,Rtshift="Rt_noshift",plot=F,tune=F,ao=c(p=9,d=2,q=0))
{
  
  b=which(df_national$date_prelevement==Break)
  if(!is.na(start))
  {
    K=b-which(df_national$date_prelevement==start)+1
  }
  data_test = df_national[(b+1):(b+H), ]
  data_train = df_national[(b-K+1):b, ]
  
  rg=sapply(range(data_train$date_prelevement),function(x){as.Date(x)-as.Date('2020-01-01')})
  hts<-ts(data_train%>%pull(Rtshift),start=c(2020,rg[1]),end=c(2020,rg[2]),frequency=365)
  
  if(tune){
    ##tune arima
    hts %>% auto.arima() %>% arimaorder ->ao
  }
  ##get fit
  fit0<-Arima(y=hts,order=ao,include.drift=F,method="ML")
  ##predict
  frcst0<- fit0 %>%forecast(h=H)
  fit0%>%checkresiduals
  frcst0<-data.frame(frcst0)
  frcst0$date<-data_test$date_prelevement
  frcst0$exo<-Rtshift
  
  
  ##get fit with exo1
  if(tune){
    hts %>% auto.arima(xreg=data_train%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1)%>%as.matrix) %>% arimaorder ->ao
  }
  fit1<-Arima(y=hts,xreg=data_train%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst1<- fit1%>%forecast(h=H,xreg=data_test%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1)%>%as.matrix)
  fit1%>%checkresiduals
  frcst1<-data.frame(frcst1)
  frcst1$date<-data_test$date_prelevement
  frcst1$exo<-paste0(Rtshift,'+Ct_quantiles')
  
  
  ##get fit with exo2
  if(tune){
    hts %>% auto.arima(xreg=data_train%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1,positive_rate)%>%as.matrix) %>% arimaorder ->ao
  }
  fit2<-Arima(y=hts,xreg=data_train%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1,positive_rate)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst2<- fit2%>%forecast(h=H,xreg=data_test%>%select(Ct_q0,Ct_q0.25,Ct_q0.5,Ct_q0.75,Ct_q1,positive_rate)%>%as.matrix)
  fit2%>%checkresiduals
  frcst2<-data.frame(frcst2)
  frcst2$date<-data_test$date_prelevement
  frcst2$exo<-paste0(Rtshift,'+Ct_quantiles+pos_test_rate')
  
  
  ##get fit with exo3
  if(tune){
    hts %>% auto.arima(xreg=data_train%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1,positive_rate)%>%as.matrix) %>% arimaorder ->ao
  }
  fit3<-Arima(y=hts,xreg=data_train%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1,positive_rate)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst3<- fit3%>%forecast(h=H,xreg=data_test%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1,positive_rate)%>%as.matrix)
  fit3%>%checkresiduals
  frcst3<-data.frame(frcst3)
  frcst3$date<-data_test$date_prelevement
  frcst3$exo<-paste0(Rtshift,'+Ct_resid_quantiles+pos_test_rate')
  
  
  ##get fit with exo4
  if(tune){hts %>% auto.arima(xreg=data_train%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1)%>%as.matrix) %>% arimaorder ->ao
  }
  fit4<-Arima(y=hts,xreg=data_train%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst4<- fit4%>%forecast(h=H,xreg=data_test%>%select(Ctresid_q0,Ctresid_q0.25,Ctresid_q0.5,Ctresid_q0.75,Ctresid_q1)%>%as.matrix)
  fit4%>%checkresiduals
  frcst4<-data.frame(frcst4)
  frcst4$date<-data_test$date_prelevement
  frcst4$exo<-paste0(Rtshift,'+Ct_resid_quantiles')
  
  
  ##get fit with exo5
  if(tune){hts %>% auto.arima(xreg=data_train%>%select(positive_rate)%>%as.matrix) %>% arimaorder ->ao
  }
  fit5<-Arima(y=hts,xreg=data_train%>%select(positive_rate)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst5<- fit5%>%forecast(h=H,xreg=data_test%>%select(positive_rate)%>%as.matrix)
  fit5%>%checkresiduals
  frcst5<-data.frame(frcst5)
  frcst5$date<-data_test$date_prelevement
  frcst5$exo<-paste0(Rtshift,'+pos_test_rate')
  
  
  ##get fit with exo6
  if(tune){hts %>% auto.arima(xreg=data_train%>%select(Ct_resid_skewness)%>%as.matrix) %>% arimaorder ->ao
  }
  fit6<-Arima(y=hts,xreg=data_train%>%select(Ct_resid_skewness)%>%as.matrix,order=ao,include.drift=T,method="ML")
  ##predict
  frcst6<- fit6%>%forecast(h=H,xreg=data_test%>%select(Ct_resid_skewness)%>%as.matrix)
  fit6%>%checkresiduals
  frcst6<-data.frame(frcst6)
  frcst6$date<-data_test$date_prelevement
  frcst6$exo<-paste0(Rtshift,'+Ct_resid_skewness')
  
  
  frcst<-bind_rows(frcst0,frcst1,frcst2,frcst3,frcst4,frcst5,frcst6)%>%separate(exo,c(NA,"data_source"),11,remove=F)
  
  
  pred_compare_exo<-ggplot()+
    geom_line(data=df_national,aes(x=date_prelevement,y=get(Rtshift)),color='grey20')+
    geom_line(data=frcst%>%filter(exo%in%c("Rt_noshift","Rt_noshift+Ct_resid_skewness","Rt_noshift+Ct_resid_quantiles","Rt_noshift+pos_test_rate")),
              aes(x=date,y=Point.Forecast,color=data_source))+
    scale_color_manual(values=c("grey20","chartreuse4","darkorchid4","darkorange4"),name="data sources (in addition to Rt)")+
    scale_y_continuous(sec.axis=sec_axis(~.*30),name=Rtshift)+
    theme_bw()
  
  
  if(plot==T){
    
    pdf(file=paste0('plots/example_learning_exo_breakpoint-',Break,'_',Rtshift,"-learning",K,"-tune",tune,'.pdf'),width=14,height=6)
    
    print(pred_compare_exo)
    
    dev.off()
    
  }
  
  
  
}


###
examplePrediction(Break="2020-08-23",start="2020-08-01",Rtshift="Rt_noshift",H=7,plot=T)
examplePrediction(Break="2020-10-01",start="2020-08-01",Rtshift="Rt_noshift",H=7,plot=T)
examplePrediction(Break="2020-10-15",start="2020-08-01",Rtshift="Rt_noshift",H=7,plot=T)







