library("ggplot2")
library(dplyr)
library(gridExtra)
library(ggpubr)

stats = read.delim2("subsample_vs_ref.tsv")
pdf("subsample_vs_ref.pdf")

## colorbind palette from http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
db.colors <- c(taxalogue = "#000000", midori = "#E69F00", tidybug = "#009E73")

## Copied from http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization
#+++++++++++++++++++++++++
# Function to calculate the mean and the standard deviation
  # for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
  #to be summariezed
# groupnames : vector of column names to be used as
  # grouping variables

data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

stats2 <- data_summary(stats, varname="hits", 
                    groupnames=c("db", "identity"))


dbs = c("taxalogue", "midori", "tidybug")

stats.no_porter = stats[stats$db != "porter", ]
stats.no_porter = stats.no_porter[stats.no_porter$sample != "porter", ]

stats.no_porter2 <- data_summary(stats.no_porter, varname="hits", 
                    groupnames=c("db", "identity"))

stats.no_porter2.98 = stats.no_porter2[stats.no_porter2 $identity > 97, ]
ggplot(data=stats.no_porter2.98, aes(x=identity, y=hits, fill=factor(db, levels=dbs))) +
    geom_col(position="dodge") +
    geom_errorbar(aes(ymin=hits-sd, ymax=hits+sd), color="#707070", position="dodge") +
    theme_classic() +
    scale_x_reverse() +
    scale_fill_manual(values=db.colors) + 
    labs(title = "", fill = "reference database")

