#########################  statistical analysis #####

names_for_columns <- c("solid_deltaR","solid_richness","solid_sackin","solid_skew","solid_speciationBD","solid_extinctionBD",
                       "merge_deltaR","merge_richness","merge_sackin","merge_skew","merge_speciationBD", "merge_extinctionBD",
                       "mergesplit_deltaR","mergesplit_richness","mergesplit_sackin","mergesplit_skew","mergesplit_speciationBD","mergesplit_extinctionBD",
                       "split_deltaR","split_richness","split_sackin","split_skew","split_speciationBD", "split_extinctionBD")

# just to remember the parameter combinations
# all_k <- c(rep(2,9),rep(5,9),rep(8,9))
# all_lambda <- rep(c(0.001,0.001,0.001,0.01,0.01,0.01,0.05,0.05,0.05),3)
# all_mu <- rep(c(1,0.5,0.2),9)
# all_gamma <- rep(2,27)
# but here, I give the same distance:

all_k <- c(rep(1,9),rep(2,9),rep(3,9))
all_lambda <- rep(c(1,1,1,2,2,2,3,3,3),3)
all_mu <- rep(c(3,2,1),9)

table_for_plot <- data.frame(lambda=all_lambda,
                             mu=all_mu,
                             k=all_k,
                             scena=c(rep("merge",number_parameter_combinations)))
table_for_plot <- rbind(table_for_plot,
                        data.frame(lambda=all_lambda + 0.2,
                                   mu=all_mu,
                                   k=all_k,
                                   scena=c(rep("mergeplit",number_parameter_combinations))))
table_for_plot <- rbind(table_for_plot,
                        data.frame(lambda=all_lambda - 0.2,
                                   mu=all_mu,
                                   k=all_k,
                                   scena=c(rep("solid",number_parameter_combinations))))



table_for_plot <- cbind(table_for_plot,
                        deltaR = NA,
                        richness = NA,
                        sackin = NA,
                        skew = NA,
                        speciationBD = NA,
                        extinctionBD = NA)

limits <- list()
limits [[1]] <- c(-0.7709386,1)
limits [[2]] <- c(3,695)
limits [[3]] <- c( -0.5133908,32.14948)
limits [[4]] <- c(-1.388695,8.789484)
limits [[5]] <- c( 0.01793155, 1.433263)
limits [[6]] <- c(3.737635e-05,1.49779)


#table_manual <-table_for_plot
table_manual <- read.csv("manual.csv")

for( i in 1:27){
  parameters <- i
  output <- read.csv(file=paste0("output_pars_",parameters,".csv"))[-1]
  table_pvalues <- NULL
  #solid vs all
  
   for(ii in 1:6){
    range_metric <- limits[[ii]]


    solid_values <- output[,ii]
    merge_values <- output[,ii + 6]
    mergesplit_values <- output[,ii + 12]  
    
    table_to_compare <- data.frame(solid_values,merge_values,mergesplit_values)
    
    the_means <- c(mean(solid_values,na.rm = TRUE),mean(merge_values,na.rm = TRUE),mean(mergesplit_values,na.rm = TRUE))

    
    wilcox_solid_merge <- wilcox.test(solid_values,merge_values,paired = FALSE,alternative = "two.sided")
    wilcox_solid_mergesplit <- wilcox.test(solid_values,mergesplit_values,paired = FALSE,alternative = "two.sided")
    wilcox_merge_mergesplit <- wilcox.test(merge_values,mergesplit_values,paired = FALSE,alternative = "two.sided")
    
    
    
    
    
  if(wilcox_solid_merge$p.value < 0.05 && wilcox_merge_mergesplit$p.value < 0.05 && wilcox_solid_mergesplit$p.value < 0.05){
      the_means[1] <- the_means[1] 
    the_means[2] <- the_means[2] 
    the_means[3] <- the_means[3] 
  } 
  
    
    if(wilcox_solid_merge$p.value < 0.05 && wilcox_merge_mergesplit$p.value > 0.05){
      the_means[1] <- the_means[1] 
    the_means[2] <- the_means[3] 
  
    }
    
    if(wilcox_solid_merge$p.value > 0.05 && wilcox_merge_mergesplit$p.value < 0.05){   
    the_means[1] <- the_means[2] 
    the_means[3] <- the_means[3]
    
    }
    
    if(wilcox_solid_mergesplit$p.value > 0.05 && wilcox_merge_mergesplit$p.value < 0.05){  
    the_means[1] <- the_means[3] 
    the_means[2] <- the_means[2] 
    } 
    if(wilcox_solid_merge$p.value > 0.05 && wilcox_merge_mergesplit$p.value > 0.05 && wilcox_solid_mergesplit$p.value > 0.05){ 
    the_means[2] <- the_means[1] 
    the_means[3] <- the_means[1]
    
    }

    #large_bin <- (range_metric[2]-range_metric[1])/5
    small_bin <- (range_metric[2]-range_metric[1])/8 
    
    
        # find which large bin they go to
    
    scaled_values <- c(0,0,0)
    
    
    
    number_small_bins <- ceiling(min(the_means)/small_bin)
    
    
    scaled_values[which(min(the_means) == the_means)] <- number_small_bins * small_bin
    
    if(length(which(scaled_values==0))==2){ # the 3 means are different?
      scaled_values[which(median(the_means) == the_means)] <- (number_small_bins + 1) * small_bin
      scaled_values[which(max(the_means) == the_means)] <- (number_small_bins + 2) * small_bin 
    }
    
    if(length(which(scaled_values==0))==1){ # the 2 means are different?
      scaled_values[which(max(the_means) == the_means)] <- (number_small_bins + 2) * small_bin
    }
    
    #so far the order is solid merge and mergesplit, but in the final table, it changes
    scaled_values[1]
    
  table_manual[parameters,5 + ii] <- scaled_values[2]
  table_manual[parameters + 54,5 + ii] <- scaled_values[1]  
  table_manual[parameters + 27,5 + ii] <- scaled_values[3]    
    
    
  }

  table_manual
 # saveRDS(table_manual,file = "table_manual.RDS")
  
  
}



# library(gmodels)
# # NOT RUN {
# # mean and confidence interval
# ci(nonsolid_values )
# ci(solid_values )