# This supplementary material is hosted by Eurosurveillance as supporting information alongside the article [Title] on behalf of the authors who remain responsible for the accuracy and appropriateness of the content. The same standards for ethics, copyright, attributions and permissions as for the article apply. Supplements are not edited by Eurosurveillance and the journal is not responsible for the maintenance of any links or email addresses provided therein.

rm(list=ls(all=TRUE)) # clears variables1 
def.par<-par(no.readonly=TRUE) # default graphical parameters

Sys.setlocale("LC_TIME", "en_IN") #use English for dates

#Sys.setlocale("LC_TIME", "fr_FR.utf8") #use French for dates


library('tidyverse')
library('nnet') # for multinomial model

library('mixdist') # for Weibull distribution
library("drc") # for dose response curve model

library("texreg") # conversion to TeX
library("Hmisc") # for summary tables

library("raster")
library("ggmap")
library("sf")
library("maps")
library("mapproj")
library("zoo")

library('ggsci')       # for nice color palettes


#### Reading the data

setwd("/home/samuel/Documents/Hote-parasite/COVID-2019/Ct/VOC/Omicron/")


data_sequencing<-read.csv2(file="data_sequencing_2021_2022.csv")
data_sequencing$date_labo<-as.Date(data_sequencing$date_labo)

min(data_sequencing$date_labo)
max(data_sequencing$date_labo)

data_sequencing$sequence<-as.character(data_sequencing$sequence)
data_sequencing$sequence = replace(data_sequencing$sequence,
                             which(data_sequencing$sequence %in% c("B.1.640")),
                             "20C")

data_sequencing$sequence = replace(data_sequencing$sequence,
                                   which(data_sequencing$sequence %in% c("20A","20B")),
                                   "20A or 20B")
data_sequencing$sequence<-as.factor(data_sequencing$sequence)

data_old<-data_sequencing



sum(table(data_old$COV2Q_01...COV.SEQ.INDIC))

names(data_sequencing)

data_sequencing<-data_sequencing[,c(15,7,18:20,22,24:28)]

write.csv2(data_sequencing,file="data_EID_sequencing.csv",row.names = FALSE)


ggplot(dplyr::filter(data_sequencing,sequence!="other",sequence!="sequin"))+
  geom_bar(aes(x=date_labo,fill=sequence),position="fill")+
  labs(x="",y="proportion",fill="lineage")+
  scale_x_date(date_labels = "%b %Y",date_breaks = "1 month")+
  #  scale_y_continuous(trans = 'log10')+
  theme(axis.text.x=element_text(angle = -45, hjust = 0))+
  scale_fill_nejm()+
  theme_classic()


ggsave(paste("sequences_proportions.pdf",sep=""),
       width = 16,height = 8,units = "cm")



ggplot(dplyr::filter(data_sequencing,sequence!="other",sequence!="sequin"))+
  geom_bar(aes(x=date_labo,fill=sequence))+
  labs(x="",y="proportion",fill="lineage")+
  #  scale_y_continuous(trans = 'log10')+
  theme(axis.text.x=element_text(angle = -45, hjust = 0))+
  scale_fill_nejm()+
  theme_classic()


ggsave(paste("sequences_numbers.pdf",sep=""),
       width = 14,height = 8,units = "cm")


table(data_sequencing$sequence)




##########################################"
### Transmission advantage caclulation




## functions for transmission advantage


# serial interval distribution parameters
MEAN = 4.8 ; SD = 2.3
# Nishiura et al. (2020) IJID https://doi.org/10.1016/j.ijid.2020.02.060
WP.chosen = weibullpar(MEAN,SD)
# transform into Weibull shape and scale parameters

# serival interval distribution
is=function(x){
  dweibull(x,WP.chosen$shape,WP.chosen$scale)
}

# Euler-Lotka integrand
EL_integrand = function(x__,r__, R__) {
  signr = -1
  if(R__ < 1){R__ = 1/R__}
  exp(signr * r__ * x__)*R__*is(x__)
}

# Euler-Lotka integral
euler_lotka = function(r_,R_){
  integrate(EL_integrand,0,Inf,r__=r_,R__=R_)$value - 1
}

# Eulter-Lotka equation solution
# (estimation of exponential growth rate given R and the
# serial interval)
rate.calc = function(R){
  if(R == 1){res = 0}
  else {
    interv = c(0,100)
    res = uniroot(euler_lotka,interval=interv,R_=R)$root
    if(R < 1){res = -res}
  }
  return(res)
}

# the function to cancel in overcontagiosityf
overcontagiosityfoo = function(x,A,R_0){
  A - rate.calc(x) + rate.calc(R_0)
}

# final calculation of the overcontagiosity
# mean and associated 95-% confidence interval
# on the reproduction number scale
overcontagiosityf = function(modl,R_0_){
  Coefs = cbind(modl$coefficients,confint(modl))
  Coefs = -Coefs[1,]
  c(uniroot(overcontagiosityfoo,A=Coefs[1],R_0=R_0_,interval=c(.2,5))$root,
    uniroot(overcontagiosityfoo,A=Coefs[3],R_0=R_0_,interval=c(.2,5))$root,
    uniroot(overcontagiosityfoo,A=Coefs[2],R_0=R_0_,interval=c(.2,5))$root)/R_0_-1
}



# logistic growth function maker
lgfm = function(lgm,given.tau = F) {
  coefs = cbind(lgm$coefficients,confint(lgm))
  if(given.tau){coefs[1,] = coefs[1,1]}
  # set tau to its mean value
  lgf = function(x){
    # define logistic growth function
    c(1/(1+exp(coefs[1,1]*(x-coefs[2,1]))),
      1/(1+exp(coefs[1,2]*(x-coefs[2,2]))),
      1/(1+exp(coefs[1,3]*(x-coefs[2,3]))))
  }
  lgf = Vectorize(lgf)
  return(lgf)
}


min_date<-"2022-01-01"
max_date<-max(as.Date(data_sequencing$date_labo))

Dt<-data_sequencing %>% dplyr::filter(#date_labo>=max_date-21,date_labo<=max_date,
  date_labo>=as.Date(min_date)#,date_labo<=as.Date(min_date)+21
)


Dt <- ungroup(Dt) %>% group_by(REGION) %>% dplyr::mutate(n_REGION=n())

#Dt$COV1K_02<-droplevels(Dt$COV1K_02)


# define relative time (in days)
Day0 = min(Dt$date_labo)
max(Dt$date_labo)

Dt$day<-Dt$date_labo-Day0
Dt$day<-as.numeric(Dt$day)


# delta against the alpha
Dt$souche_bin<-NA
Dt$souche_bin = replace(Dt$souche_bin,
                        which(Dt$sequence == "BA.2"),
                        1)
Dt$souche_bin = replace(Dt$souche_bin,
                        which(Dt$sequence == "Omicron"),
                        0)

dim(Dt)



Dt<-drop_na(Dt,souche_bin,day,age,REGION)



# GLM to generate residuals


nonhosp = F # remove hospital samples


if(nonhosp){
  Dt <- dplyr::filter(Dt,location_sampling == 'non-hospital')
  modr = glm(souche_bin~day+age+REGION,
             data=Dt,family="binomial")
} else {
  modr = glm(souche_bin~day+age+location_sampling+REGION,
             data=Dt,family="binomial")
}




# extracting residuals and fitted values
Dt$res1 = residuals(modr) ;
Dt$fit1 = fitted.values(modr)

# logistic growth fitting and display -------------------------------------



Df = Dt


# calculate daily souche frequencies


Df <- Df %>% group_by(day) %>%
  dplyr::summarise(ie = sum((souche_bin))/n(),
                   ir = sum(res1)/n(),
                   iF = sum(fit1)/n())


# logistic curve fitting
mL <- drm(iF ~ day, data = Df,
          fct = logistic(fixed = c(NA,0,1,NA,1)),
          type='continuous')

# overcontagiosity calculations
lgf = lgfm(mL,given.tau=T)

scv<-c(0,0,0)

scv = signif(overcontagiosityf(mL,1)*100,3)

scv


pdf(paste("BA2_omicron_France_",max_date,".pdf",sep=""),
    width=5,height=6,pointsize=15)
par(mfrow=c(1,1))

main.title = paste("transmission advantage","\n +",scv[1]," [",scv[2],
                   " - ",scv[3],"]%",sep='')


# logistic curve plot
# logistic curve plot
time.window = seq(min(Df$day)-3,max(Df$day)+4,by=1)
Xlim = c(min(time.window),max(time.window))
ticks.at = seq(Xlim[1],Xlim[2],length.out = 10)

plot(Df$day,Df$iF,xlim=Xlim,ylim=c(0,1.000),axes=F,
     #     xlab='',ylab = 'beta/gamma/eta frequency vs. others',
     xlab='',ylab = 'frequency of BA.2 vs. BA.1 or BA.1.1',
     pch=17,col=rgb(0,134,139,130,maxColorValue = 255))
par(new=T)
plot(time.window,lgf(time.window)[1,],
     type='l',xlim=Xlim,ylim=c(0,1.000),axes=F,
     col=c('darkgreen','lightgreen','lightgreen'),lwd=2,
     lty=c(1,3,3),main=main.title,xlab='',
     ylab='')
par(new=T)
polygon(c(time.window,rev(time.window)),
        c(lgf(time.window)[2,],rev(lgf(time.window)[3,])),
        col = rgb(28,134,238,50,max=255),lty=0)
abline(v=as.Date("2020-07-01")-Day0,
       col=rgb(.8,.2,0,.5))
abline(h=0.5,lty=2)
axis(2,ylim=c(0,1.000))
axis(1,at = ticks.at,
     labels = format(Day0+ticks.at,"%d %b"))
par(new=F)

dev.off()



###########################
# map regions
####################

table(data_sequencing$sequence)


data_regions_omicron<- ungroup(data_sequencing)  %>% group_by(REGION,sequence) %>%  
  dplyr::filter(date_labo>="2022-01-17") %>% summarise(n())

data_regions_omicron<- ungroup(data_regions_omicron) %>% group_by(REGION) %>% mutate(total=sum(`n()`))

data_regions_omicron<-ungroup(data_regions_omicron) %>% mutate(fraction=`n()`/total)

names(data_regions_omicron)<-c("region","lineage","n","total","fraction")

data_regions_omicron <- data_regions_omicron %>% dplyr::filter(lineage=="BA.2")

data_regions_omicron$region<-as.character(data_regions_omicron$region)

data_regions_omicron$region = replace(data_regions_omicron$region,
                                      which(data_regions_omicron$region == "Ile-de-France"),
                                      "Île-de-France")
data_regions_omicron$region<-as.factor(data_regions_omicron$region)

Reg.Fr = raster::getData(name="GADM", country="FRA", level=2) 

regions_target<-Reg.Fr$NAME_1
regions_target<-as.data.frame(regions_target)
names(regions_target)<-c("region")
values_target<-left_join(regions_target,data_regions_omicron,by="region")


# matching region indices

# adding the number of tests, last infered frequency
# and mean overcontagiosity to each mapped region
Reg.Fr$fraction <- 100*as.numeric(values_target$fraction)


# specifying custom colors
custom.cols <- colorRampPalette(c('red','blue'))

# spatial plots
plotFr<-raster::spplot(Reg.Fr, 'fraction',col.regions=custom.cols(20),
                       main=list(label="% BA.2"),
                       par.settings = list(fontsize = list(text = 10)),
                       margins=F,frame.plot=F,lwd=0.2)


ggarrange(plotFr,
          ncol = 1, nrow = 1,
          font.label = list(size = 9))

# ggsave("maps_BA2.pdf",
#        width = 18,height = 6,
#        units = "cm",dpi = 300)


