#Phyloseq analysis using physeq object
#and R 3.6.3
#subsetting data for downstream analysis
# age
physeq_2d <- subset_samples(physeq, age=="2 day")
sample_names(physeq_2d)#lists sample names which are present
physeq_5d <- subset_samples(physeq, age=="5 day")
# 2d resistance phenotype
physeq_2d_sus <- subset_samples(physeq_2d, status=="susceptible")
physeq_2d_res <- subset_samples(physeq_2d, status=="resistant")
physeq_2d_con <- subset_samples(physeq_2d, status=="control")
#merge samples so all the groups are together
physeq_phenotype <- merge_samples(physeq, "group")

#bray-curtis plot
rarecurve(t(otu_table(physeq)))#this displays a rarefaction curve
bray_curtis <- phyloseq::distance(physeq, method = "bray")
ordination <- ordinate(physeq, method = "PCoA", distance = bray_curtis)                    
plot_ordination(physeq, ordination, color = "group") + theme_classic()
adonis(bray_curtis ~ sample_data(physeq)$group)
#plot
bray_curtis_2d <- phyloseq::distance(physeq_2d, method = "bray")
ordination_2d <- ordinate(physeq_2d, method = "PCoA", distance = bray_curtis_2d)
physeq_2d%>%
  plot_ordination(ordination_2d, color = "status")+
  geom_point(size=2)+
  theme_classic()+
  scale_colour_manual(values = c("control" = "red", "resistant" = "blue", "susceptible" = "green" ))

#heatmap 2-3d mosquitoes
#filter out asvs with an abundance of under 150 
#including them clutters the heatmap
#and doesn't tell us much useful
physeq_2d_abund <- filter_taxa(physeq_2d,function(x) sum(x > 150)>0,TRUE)
physeq_2d_abund%>%
  plot_heatmap(sample.label = "status", taxa.label = "genus", 
               sample.order = "status", low = "skyblue", high = "blue4", na.value = "grey80",
               legend = FALSE)+
  theme_bw()+
  theme(axis.text.y = element_text(size = 20))+
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text.x = element_text(size = 60))+
  facet_grid(~ status, scales = "free_x", space = "free_x", switch = "x")+
  theme(legend.position = "top")

#stacked bar plot
#using the merged phyloseq object
physeq_phenotype2 <- filter_taxa(physeq_phenotype, function(x) mean(x) > 0.1, TRUE)#this didn't change
physeq_phenotype3 = transform_sample_counts(physeq_phenotype2, function(x) x / sum(x) )
#turn all otus into genus counts
phenotype_glom <- tax_glom(physeq_phenotype3, taxrank = 'genus')
data_phenotype_glom<- psmelt(phenotype_glom) # create dataframe from phyloseq object
data_phenotype_glom$genus <- as.character(data_phenotype_glom$genus) #convert to character
#simple way to rename phyla with < 1% abundance
data_phenotype_glom$genus[data_phenotype_glom$Abundance < 0.01] <- "< 1% abund."#previously was 0.01
data_phenotype_glom
#create a named character vector and then use it as a labeller
#using the as.labeller function
age_names <- c('1'="2 day", '2'="5 day")
data_phenotype_glom%>%
  mutate(group=factor(group,levels = c("8", "4", "1", "6", "3", "5", "2", "7")))%>%
  ggplot(aes(x=group, y=Abundance, fill= fct_reorder(genus, Abundance)))+
  geom_bar(aes(), stat="identity", position="stack")+
  theme_bw()+
  guides(fill=guide_legend(title = "Genus", reverse = T))+
  facet_grid(~ age, ncol(1), scales = "free", labeller = as_labeller(age_names))+
  scale_x_discrete(breaks=c("8", "4", "1", "6", "3", "5", "2", "7"),
                   labels=c("Susceptible", "Res. 5x", "Res. 10x", "Control", "Res. 1x", "Res. 5x", "Res. 10x", "Control"),
                   name="Phenotype")+
  #scale_fill_brewer(palette = "Set3")#this palette doesn't have enough colours
  scale_fill_manual(values=c("grey", "goldenrod1", "seagreen", "blue", 
                             "red", "darkgreen", "deeppink", "khaki2", 
                             "yellow", "darkorange1", "cyan1", "salmon", "skyblue", "yellow",
                             "purple", "orange","blue", "green", "yellowgreen", "lavenderblush3",
                             "magenta3","deepskyblue3", "coral2", "red", 
                             "yellowgreen","blue", "goldenrod1", "seagreen", "yellow", 
                             "lightgreen", "darkgreen", "deepskyblue3", "orchid", 
                             "grey", "darkgreen", "seagreen",
                             "yellow", "darkorange1", "cyan1", "salmon", "skyblue", "yellow",
                             "red", "orange","blue", "green", "yellowgreen", "lavenderblush3"))

#table with the relative abundances displayed per group
ps <- tax_glom(physeq, "genus")
ps0 <- transform_sample_counts(ps, function(x)100* x/ sum(x))
ps1 <- merge_samples(ps0, "group")
ps2 <- transform_sample_counts(ps1, function(x)100* x/sum(x))
ps3 <- ps2
otu_table(ps3) <- t(otu_table(ps3))
OTUg <- otu_table(ps3)
TAXg <- tax_table(ps3)[,"genus"]
GTable <- merge(TAXg, OTUg, by=0, all=TRUE)
GTable$Row.names = NULL
GTable$Mean=rowMeans(GTable[,-c(1)], na.rm = TRUE)
GTable <- GTable[order(desc(GTable$Mean)),]
head(GTable)
write.csv(GTable, "relative_abundance_group.csv")

#make a table with relative abundances displayed by age
#using the transformed phyloseq object ps0
ps1a <- merge_samples(ps0, "age")
ps2a <- transform_sample_counts(ps1a, function(x)100* x/sum(x))
ps3a <- ps2a
otu_table(ps3a) <- t(otu_table(ps3a))
OTUa <- otu_table(ps3a)
TAX.a <- tax_table(ps3a)[,"genus"]
ATable <- merge(TAX.a, OTUa, by=0, all=TRUE)
ATable$Row.names = NULL
ATable$Mean=rowMeans(ATable[,-c(1)], na.rm = TRUE)
ATable <- ATable[order(desc(ATable$Mean)),]
write.csv(ATable, "relative_abundance_age.csv")