library(ggplot2)
library(effsize)

plotDir = './plots'

dataCE <- readRDS(file="./data/ce/data_augmented.rds")
# dataCE <- dataCE[which(dataCE$strategy=='UET'),]
# dataCS <- readRDS(file="./data/cs/data.rds")

# df <- dataCS

printf <- function(...) invisible(cat(sprintf(...)))


df <- dataCE

sizes <- c(1:20)

##loc in place of percentage
df[which(df$aut=='MunchLife'),]$coverage <- df[which(df$aut=='MunchLife'),]$coverage * 184
df[which(df$aut=='SimplyDo'),]$coverage <- df[which(df$aut=='SimplyDo'),]$coverage * 1281
df[which(df$aut=='TippyTipper'),]$coverage <- df[which(df$aut=='TippyTipper'),]$coverage * 999
df[which(df$aut=='Trolly'),]$coverage <- df[which(df$aut=='Trolly'),]$coverage * 363

df$coverage <- round(df$coverage)

printf <- function(...) invisible(cat(sprintf(...)))

AUTs <- unique(df$aut)

names <- c("AUT", "FirstVar", "FirstVarShapiro","IsFirstVarNormal", "SecondVar", "SecondVaShapiro", "IsSecondNormal", "Test", "TestValue", "Outcome", "EffSize")
tests <- data.frame(AUT = NA, FirstVar = NA, FirstVarShapiro=NA, IsFirstNormal = NA, SecondVar = NA, SecondVarShapiro = NA, IsSecondNormal = NA, Test=NA, TestValue=NA, Outcome=NA, EffSize=NA )
names(tests) <- names
tests <- tests[FALSE,]

for(AUT in AUTs){
  printf("AUT: %s\n",AUT)
  for(sUET1 in sizes){ #UET
    printf("%d %s\n",sUET1,"UET")
    #check if first var is normal
    firstVar <- df[which(df$aut==AUT & df$size==sUET1 & df$strategy=="UET"),]$coverage
    #firstVarKS <- ks.test(firstVar,"pnorm")$p.value
    if(length(unique(firstVar))>1 & length(firstVar)<=5000){
      firstVarShapiro <- shapiro.test(firstVar)$p.value
    }
    else {
      firstVarShapiro <- 0
    }
    isFirstVarNormal <- firstVarShapiro > 0.05
    
    for(sUET2 in sizes){ #IET
      #☺printf("%d UET2\n",sUET2)
      #check if second var is normal
      secondVar <- df[which(df$aut==AUT & df$size==sUET2 & df$strategy=="IET"),]$coverage
      #secondVarKS <- ks.test(secondVar,"pnorm")$p.value
      if(length(unique(secondVar))>1 & length(secondVar)<=5000){
        secondVarShapiro <- shapiro.test(secondVar)$p.value
      }
      else {
        secondVarShapiro <- 0
      }
      isSecondVarNormal <- secondVarShapiro > 0.05
      
      if(isFirstVarNormal & isSecondVarNormal) { #non accade mai
        printf("T-TEST???\n")
        test <- "T-test"
        ssize<-5000#min(length(firstVar),length(secondVar))
        if(length(firstVar)>ssize)
          firstVar <- sample(firstVar,ssize)
        if(length(secondVar)>ssize)
          secondVar <- sample(secondVar,ssize)
        
        testV_less <- t.test(firstVar, secondVar, alternative="less")
        testV_greater <- t.test(firstVar, secondVar, alternative="greater")
        #testValue <- as.numeric(format(testV$p.value,digits=2, nsmall=2))
        testValue_less <- testV_less$p.value
        testValue_greater <- testV_greater$p.value
        
        if(testValue_less <= 0.05 & testValue_greater > 0.05){
          outcome <- "less"#sprintf("Refuse: N=%d UET coverage is less than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_less
        }
        else if (testValue_less > 0.05 & testValue_greater <= 0.05) {
          outcome <- "greater"#sprintf("Refuse: N=%d UET coverage is greater than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_greater
        } else if (testValue_less <= 0.05 & testValue_greater <= 0.05){
          print("SHOULD NEVER HAPPEN!")
        } else {
          outcome <- "Not refuse"
          testValue=0
        }
        
      } else {
        test <- "Wilcoxon"
        #testV <- wilcox.test(coverage ~ size, data=df, alternative=alt, subset = df$aut==AUT & ( (df$size==sUET1 & df$strategy=='UET') | (df$size==sUET2 & df$strategy=='UET') )  )       
        
        #extract subsample
        ssize<-5000#min(length(firstVar),length(secondVar))
        if(length(firstVar)>ssize)
          firstVar <- sample(firstVar,ssize)
        if(length(secondVar)>ssize)
          secondVar <- sample(secondVar,ssize)
        
        testV_less <- wilcox.test(firstVar, secondVar, alternative="less")
        testV_greater <- wilcox.test(firstVar, secondVar, alternative="greater")
        #testValue <- as.numeric(format(testV$p.value,digits=2, nsmall=2))
        testValue_less <- testV_less$p.value
        testValue_greater <- testV_greater$p.value
        if(testValue_less <= 0.05 & testValue_greater > 0.05){
          outcome <- "less"#sprintf("Refuse: N=%d UET coverage is less than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_less
        }
        else if (testValue_less > 0.05 & testValue_greater <= 0.05) {
          outcome <- "greater"#sprintf("Refuse: N=%d UET coverage is greater than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_greater
        } else if (testValue_less <= 0.05 & testValue_greater <= 0.05){
          print("SHOULD NEVER HAPPEN!")
        } else {
          outcome <- "Not refuse"
          testValue=0
        }
      }
      
      if(length(firstVar)<=3 || length(secondVar)<=3){ #cliff's delta not defined
        effSize <- NA
      }
      else {
        effSize = cliff.delta(firstVar,secondVar)$estimate
      }
      
      record <- data.frame(
        AUT,
        sUET1, firstVarShapiro, isFirstVarNormal,
        sUET2, secondVarShapiro, isSecondVarNormal,
        test, testValue, outcome, effSize
      )
      names(record) <- names
      
      tests <- rbind(tests,record)
      
    }
  }
}


saveRDS(tests,file="statistical_tests_across_strategy.rds")








tests <- readRDS(file="statistical_tests_across_strategy.rds")
tests$Outcome <- as.factor(tests$Outcome)

tests$EffSize <- abs(tests$EffSize)
tests$EffSizeCat <- rep(NA,1600)

tests[which(tests$EffSize>=0.474),]$EffSizeCat <- "large"
tests[which(tests$EffSize>=0.33 & tests$EffSize<0.474),]$EffSizeCat <- "medium"
tests[which(tests$EffSize>=0.148 & tests$EffSize<0.33),]$EffSizeCat <- "small"
tests[which(tests$EffSize<0.148),]$EffSizeCat <- "negligible"

tests$EffSizeCat <- factor(tests$EffSizeCat,levels=c("large","medium","small","negligible",NA))


# fix diagonal sampling error
#tests[which(tests$FirstVar==tests$SecondVar),]$Outcome <- "Not refuse"
#tests[which(tests$FirstVar==tests$SecondVar),]$TestValue <- NA
tests[which(tests$FirstVar==20),]$Outcome <- "Not refuse"
tests[which(tests$FirstVar==20),]$TestValue <- NA
tests[which(tests$SecondVar==20),]$Outcome <- "Not refuse"
tests[which(tests$SecondVar==20),]$TestValue <- NA

tests$Outcome <- as.character(tests$Outcome)
tests[which(tests$Outcome=="Not refuse"),]$Outcome <- "none"
tests[which(tests$Outcome=="greater"),]$Outcome <- "UET coverage greater than IET coverage"
tests[which(tests$Outcome=="less"),]$Outcome <- "UET coverage smaller than IET coverage"
tests$Outcome <- factor(tests$Outcome, levels=c("UET coverage greater than IET coverage","UET coverage smaller than IET coverage","none"))


tests <- tests[which(tests$FirstVar!=20 & tests$SecondVar!=20),]


p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=EffSizeCat,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_d(name="Effect size (Cliff's Delta)") +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(AUT~.,ncol=4) +
  xlab("Number of testers (UET)") + 
  ylab("Number of testers (IET)") + 
  theme(legend.position="top")

print(p)



p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=EffSize,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_c(name="Effect size (Cliff's Delta)",direction=-1) +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(AUT~.,ncol=4) +
  xlab("Number of testers (UET)") + 
  ylab("Number of testers (IET)") + 
  theme(legend.position="top")
print(p)



p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=Outcome, width=0.8, height=0.8), color="black")+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_d(name="Accepted Alternative Hypothesis") +
  #scale_alpha(range=c(1,0.3)) +
  scale_fill_manual(values=c("#ffa8a8","#020873","white","#ADD5F7","#7ABAF2","#1B76FF","#0003C7","#020873"), name="Accepted Alternative Hypothesis") +
  theme_bw() +
  facet_wrap(AUT~.,ncol=4) +
  xlab("Number of testers (UET)") + 
  ylab("Number of testers (IET)") + 
  theme(legend.position="top")
print(p)

#write.csv(tests,"tests.csv", row.names = FALSE)