#Library loading

library(oligo) 
library(FactoMineR)
library(limma)
library(clariomshumanhttranscriptcluster.db)
library(matrixStats)


#Loading and normalisation of the genomic data


data_raw=read.celfiles(list.celfiles("~/E17_080 Raw Data/",full.names=TRUE))

#normalization through the use of the RMA algorithm of oligo package
data_rma=rma(data_raw) 
#Checking for possible outlier samples
boxplot(data_rma, col=rainbow(ncol(data_rma)))
hist(data_rma, col=rainbow(ncol(data_rma)))


#Converting probes ID to gene symbols using clariomshumanhttranscriptcluster.db library

x=clariomshumanhttranscriptclusterSYMBOL 
mapped_probes=mappedkeys(x)
x=as.list(x[mapped_probes])
data_rma_mapped=exprs(data_rma)
data_rma_mapped=data_rma_mapped[mapped_probes,]
list_gene_names=mget(mapped_probes,clariomshumanhttranscriptclusterSYMBOL )
rownames(data_rma_mapped)=list_gene_names

#In case of several probes for one gene : extracting the probe with the highest coefficient of variation
u=c()
m=0

for (k in unique(rownames(data_rma_mapped))) {
  a=data_rma_mapped[rownames(data_rma_mapped)==k,]
  if (data.class(a)=="matrix") {
    mean_1=rowMeans(a)
    var=sapply(as.data.frame(t(a)),var)
    CV=var/mean_1
    v=a[which.max(CV),]
    u=rbind(u,v)
  }
  else {
    u=rbind(u,a)
  }
  m=m+1
  print(m)
}

data=u
rownames(data)=unique(rownames(data_rma_mapped))

#Annotation of the samples

Treatment=c(rep("CTL",time=3),rep("NPs",time=3),rep("CTL",time=3),rep("NPs",time=3),rep("CTL",time=3),rep("NPs",time=3))
Time=c(rep("T1day",time=6),rep("T2weeks",time=6),rep("T2months",time=6))

Condition=paste(Treatment,"_",Time)

Condition=gsub(" ","", Condition)
Condition=ordered(Condition, levels=c("T1day_CTL", "T1day_NPs","T2weeks_CTL", "T2weeks_NPs","T2months_CTL", "T2months_NPs"))

#Unsupervised analysis with PCA

#selection of the most variant genes for analysis : using coefficient of variation as criterion
genes_cv=apply(data,MARGIN = 1,FUN = function(x) {var(x)/mean(x)})
hist(log10(genes_cv),n=100)
abline(v=log10(min(genes_cv[order(genes_cv,decreasing = T)][500])), col="red", lty=2, lwd=2, xlab="log(corrected variance)")
top_500=genes_cv[order(genes_cv,decreasing = T)]
top_500=names(top_500[1:500])

#Classical PCA analysis using FactoMineR package
PCA_data=PCA(X = t(data[top_500,]),scale.unit = T,graph = F)
plot.PCA(PCA_data,choix = "ind",label = "none",col.ind = string.to.colors(Treatment),cex=2, cex.lab=1.5)
legend("topleft",legend = c("CTL", "NPs"),
       col=rainbow(length(unique(Treatment))),pch=16,cex=1.5)
plot.PCA(PCA_data,choix = "ind",label = "none",col.ind = string.to.colors(Time),cex=2, cex.lab=1.5)
legend("topleft",legend = c("t1day", "t2weeks","t2months"),
       col=rainbow(length(unique(Time))),pch=16,cex=1.5)


##Differential expression analysis using Limma package 

#Estimating the Time effect on non-stimulated samples

f=factor(Condition, levels = unique(Condition))
design=model.matrix(~0+f)
colnames(design)=levels(f)
fit=lmFit(data,design)

contrast_matrix_time=makeContrasts("T2weeks_CTL-T1day_CTL",
                                   "T2months_CTL-T1day_CTL",
                                   levels=design)


fit_time=contrasts.fit(fit, contrast_matrix_time)
fit_time=eBayes(fit_time)
volcanoplot(fit_time,highlight=20,names=rownames(fit_time),coef=1, main="CTL T2weeks-T1day")
volcanoplot(fit_time,highlight=20,names=rownames(fit_time),coef=2, main="CTL T2months-T1day")


results_time=decideTests(fit_time,adjust.method = 'BH',p.value = 0.01)
venn_count_time=vennCounts(results_time)
vennDiagram(venn_count_time,names =c("T2weeks-T1day","T2months-T1day"))

#Gold nanoparticles conditions compared to control conditions

contrast_matrix_NPs=makeContrasts("T1day_NPs-T1day_CTL",
                                  "T2weeks_NPs-T2weeks_CTL",
                                  "T2months_NPs-T2months_CTL",
                                  levels=design)

fit_NPs=contrasts.fit(fit, contrast_matrix)
fit_NPs=eBayes(fit_NPs)
volcanoplot(fit_NPs,highlight=20,names=rownames(fit_NPs),coef=1, main="T1day")
volcanoplot(fit_NPs,highlight=20,names=rownames(fit_NPs),coef=2, main="T2weeks")
volcanoplot(fit_NPs,highlight=20,names=rownames(fit_NPs),coef=3, main="T2months")

results_NPs=decideTests(fit_NPs,adjust.method = 'BH',p.value = 0.01)
venn_count_NPs=vennCounts(results_NPs)
vennDiagram(venn_count_NPs,names =c("T1day","T2weeks","T2months"))


