#### 1. loading required libraries ####
library(dplyr)#数据清洗，Data cleaning
library(plyr)#数据清洗，Data cleaning
library(readxl)#读入 excel, read excel
library(stringr)#字符串处理.string manIPulation
library(data.table)
library(tidyr)#数据清洗，Data cleaning
#### 2. Setting working dictionary path ####
wdImport <- c("E:/Study/SCI/Soil Micro/SCI/Figures/Data/Data for submit")
setwd(wdImport)
#### 3 Import and process data ####

ASVCount <- read.table("feature_table_FourStages_DAD2_F295_R205_Filtered1.txt",header=T,row.names = 1,na.strings = c("NA"))
FourStagesSampleData <- read.table("FourStages_SampleData.txt",header=T,row.names = 1,na.strings = c("NA"))
#Change "Feature ID" to "Feature_ID" in first Line of taxonomy file generated by QIIME2 software
taxonomy <- read.table("taxonomy_FourStages_DAD2_F295_R205.txt",header=T,sep="\t",row.names = 1,fill=TRUE, na.strings = c("NA"))
taxonomy <- subset(taxonomy, select=-c(Confidence))
Sup_taxonomy <- separate(taxonomy,Taxon, c("Domain","Phylum","Class","Order","Family","Genus","Species"), "; ")
Sup_taxonomy[is.na(Sup_taxonomy)] <- ""
Sup_taxonomy$ASV<-paste("ASV",1:length(Sup_taxonomy[,1]),sep = "")
Sup_taxonomy$Genus
# treat "uncultured"， from Class and without "Specie" and "ASV"
i=3
LevelName <- colnames(Sup_taxonomy)
LevelName
for (i in 3:(ncol(Sup_taxonomy)-2)) {
  uncultured <- Sup_taxonomy[,i]==paste(str_to_lower(str_sub(LevelName[i],1,1)),'__uncultured',sep = "")
  j=i
  for (j in i:(ncol(Sup_taxonomy)-2)){Sup_taxonomy[,j][uncultured] <- paste(Sup_taxonomy[,j][uncultured],Sup_taxonomy[,i-1][uncultured],sep = "_")}
}


#replace null with "unclassified", from phylum and without "Specie"
Sup_taxonomy[,2]
LevelName <- colnames(Sup_taxonomy)
LevelName
k=2
for (k in 2:(ncol(Sup_taxonomy)-2)){
  Sup_taxonomy[Sup_taxonomy[,k]=="",k]<-paste(str_to_lower(str_sub(LevelName[k],1,1)),'__unclassified',sep = "")
}


l=2
unclassified <- Sup_taxonomy[,l]==paste(str_to_lower(str_sub(LevelName[l],1,1)),'__unclassified',sep = "")
LevelName <- colnames(Sup_taxonomy)
for (l in 2:(ncol(Sup_taxonomy)-2)) {
  unclassified <- Sup_taxonomy[,l]==paste(str_to_lower(str_sub(LevelName[l],1,1)),'__unclassified',sep = "")
  m=l
  for (m in l:(ncol(Sup_taxonomy)-2)){Sup_taxonomy[,m][unclassified] <- paste(Sup_taxonomy[,m][unclassified],Sup_taxonomy[,l-1][unclassified],sep = "_")}
}
Sup_taxonomy$Feature_ID <- rownames(Sup_taxonomy)
dim(Sup_taxonomy)
Sup_taxonomy<- Sup_taxonomy[,c(9,8,seq(1:7))]
Sup_taxonomy
write.table(Sup_taxonomy, paste("Sup_","taxonomy_FourStages_DAD2_F295_R205.txt",sep=""), row.names=F,sep = '\t', quote = FALSE)

#
Sup_taxonomy_FourStages_DAD2_F295_R205 <- read.table("Sup_taxonomy_FourStages_DAD2_F295_R205.txt",header=T,row.names = 1,fill=TRUE, na.strings = c("NA"))
Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1 <- Sup_taxonomy_FourStages_DAD2_F295_R205[row.names(ASVCount),]
Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1$Feature_ID <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
dim(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1<-Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1[,c(9,2:8,1)]
all(rownames(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)==rownames(ASVCount))
write.table(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1, "Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1.txt", row.names = F, sep = ',', quote = FALSE)

#Genus
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
GenusPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Genus)%>%
                                summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
write.table(GenusPercent, paste("FourStagesSampleGenusPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)

#Phylum
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
PhylumPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Phylum)%>%
                                summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
apply(PhylumPercent[,-1],2,FUN=sum)
write.table(PhylumPercent, paste("FourStagesSamplePhylumPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)

#Phylum
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
PhylumPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Phylum)%>%
                                 summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
apply(PhylumPercent[,-1],2,FUN=sum)
write.table(PhylumPercent, paste("FourStagesSamplePhylumPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)

#Class
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
ClassPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Class)%>%
                                summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
apply(ClassPercent[,-1],2,FUN=sum)
write.table(ClassPercent, paste("FourStagesSampleClassPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)

#Order
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
OrderPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Order)%>%
                                summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
apply(OrderPercent[,-1],2,FUN=sum)
write.table(OrderPercent, paste("FourStagesSampleOrderPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)

#Family
ASVPercent  <- as.data.frame(100*sapply(1:ncol(ASVCount),function(x) ASVCount[,x]/sum(ASVCount[,x])))
colnames(ASVPercent) <- row.names(FourStagesSampleData)
rownames(ASVPercent) <- row.names(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1)
TaxonomyASVPercent <- cbind(Sup_taxonomy_FourStages_DAD2_F295_R205_Filtered1,ASVPercent)
FamilyPercent <- as.data.frame(TaxonomyASVPercent %>% group_by(Family)%>%
                                 summarise_at(vars(BInter1_1:MP4_3),funs(sum)))
apply(FamilyPercent[,-1],2,FUN=sum)
write.table(FamilyPercent, paste("FourStagesSampleFamilyPercent-Filtered1.txt",sep=""), row.names=F,sep = ';', quote = FALSE)
