library(ggplot2)
library(effsize)

plotDir = './plots'

dataCE <- readRDS(file="./data/ce/data.rds")
dataCE <- dataCE[which(dataCE$strategy=='IET'),]
dataCS <- readRDS(file="./data/cs/data.rds")

df <- dataCS

printf <- function(...) invisible(cat(sprintf(...)))

sizes <- c(2:9)
AUTs <- unique(df$aut)

names <- c("AUT", "FirstVar", "FirstVarShapiro","IsFirstVarNormal", "SecondVar", "SecondVaShapiro", "IsSecondNormal", "Test", "TestValue", "Outcome","EffSize")
tests <- data.frame(AUT = NA, FirstVar = NA, FirstVarShapiro=NA, IsFirstNormal = NA, SecondVar = NA, SecondVarShapiro = NA, IsSecondNormal = NA, Test=NA, TestValue=NA, Outcome=NA, EffSize=NA )
names(tests) <- names
tests <- tests[FALSE,]

for(AUT in AUTs){
  printf("AUT: %s\n",AUT)
  for(sUET1 in sizes){ #UET
    printf("%d UET\n",sUET1)
    #check if first var is normal
    firstVar <- df[which(df$aut==AUT & df$size==sUET1 & df$strategy=='UET'),]$coverage
    #firstVarKS <- ks.test(firstVar,"pnorm")$p.value
    if(length(unique(firstVar))>1){
      firstVarShapiro <- shapiro.test(firstVar)$p.value
    }
    else {
      firstVarShapiro <- 0
    }
    isFirstVarNormal <- firstVarShapiro > 0.05
    
    for(sUET2 in sizes){ #IET
      printf("%d UET2\n",sUET2)
      #check if second var is normal
      secondVar <- df[which(df$aut==AUT & df$size==sUET2 & df$strategy=='UET'),]$coverage
      #secondVarKS <- ks.test(secondVar,"pnorm")$p.value
      if(length(unique(secondVar))>1){
        secondVarShapiro <- shapiro.test(secondVar)$p.value
      }
      else {
        secondVarShapiro <- 0
      }
      isSecondVarNormal <- secondVarShapiro > 0.05
      
      if(isFirstVarNormal & isSecondVarNormal) { #non accade mai
        printf("T-TEST???")
        test <- "T-test"
      } else {
        test <- "Wilcoxon"
        #testV <- wilcox.test(coverage ~ size, data=df, alternative=alt, subset = df$aut==AUT & ( (df$size==sUET1 & df$strategy=='UET') | (df$size==sUET2 & df$strategy=='UET') )  )       
        
        testV_less <- wilcox.test(firstVar, secondVar, alternative="less")
        testV_greater <- wilcox.test(firstVar, secondVar, alternative="greater")
        #testValue <- as.numeric(format(testV$p.value,digits=2, nsmall=2))
        testValue_less <- testV_less$p.value
        testValue_greater <- testV_greater$p.value
        if(testValue_less <= 0.05 & testValue_greater > 0.05){
          outcome <- "less"#sprintf("Refuse: N=%d UET coverage is less than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_less
        }
        else if (testValue_less > 0.05 & testValue_greater <= 0.05) {
          outcome <- "greater"#sprintf("Refuse: N=%d UET coverage is greater than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_greater
        } else if (testValue_less <= 0.05 & testValue_greater <= 0.05){
          print("SHOULD NEVER HAPPEN!")
        } else {
          outcome <- "Not refuse"
          testValue <- 0#min(testValue_greater,testValue_less)
        }
      }
      
      effSize = cliff.delta(firstVar,secondVar)$estimate
      
      record <- data.frame(
        AUT, 
        sUET1, firstVarShapiro, isFirstVarNormal,
        sUET2, secondVarShapiro, isSecondVarNormal,
        test, testValue, outcome, effSize
      )
      names(record) <- names
      
      tests <- rbind(tests,record)
      
    }
  }
}

tests$Outcome <- as.factor(tests$Outcome)

saveRDS(tests, file="tests_cs.rds")


p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=EffSize,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_c() +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(~AUT) +
  scale_x_continuous(breaks=c(2:9)) +
  scale_y_continuous(breaks=c(2:9)) +
  theme(legend.position="top")

print(p)




p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=Outcome,alpha=TestValue,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_d() +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(~AUT) +
  scale_x_continuous(breaks=c(2:9)) +
  scale_y_continuous(breaks=c(2:9)) +
  theme(legend.position="top")

print(p)
ggsave(filename=paste(plotDir,"hypotheses_cs.png",sep="/"))


#saveRDS(tests, file="tests.rds")
#write.csv(tests,"tests.csv", row.names = FALSE)




####################################################################################
####################################################################################
####################################################################################
# COMPUTER ENGINEERING STUDENTS
####################################################################################
####################################################################################
####################################################################################








df <- dataCE

#select only 10 students, not in the following list
#df <- df[-grep("Lazzari|Apicella|Russo|Ragno|Saviano|DAvino|Bosco|Capone|Toscano|Guercia", dataCE$subset),]
sizes <- c(2:19)

##round
#df$coverage <- round(df$coverage,3)

##loc in place of percentage
df[which(df$aut=='MunchLife'),]$coverage <- df[which(df$aut=='MunchLife'),]$coverage * 184
df[which(df$aut=='SimplyDo'),]$coverage <- df[which(df$aut=='SimplyDo'),]$coverage * 1281
df[which(df$aut=='TippyTipper'),]$coverage <- df[which(df$aut=='TippyTipper'),]$coverage * 999
df[which(df$aut=='Trolly'),]$coverage <- df[which(df$aut=='Trolly'),]$coverage * 363

df$coverage <- round(df$coverage)

printf <- function(...) invisible(cat(sprintf(...)))

AUTs <- unique(df$aut)

names <- c("AUT", "FirstVar", "FirstVarShapiro","IsFirstVarNormal", "SecondVar", "SecondVaShapiro", "IsSecondNormal", "Test", "TestValue", "Outcome", "EffSize")
tests <- data.frame(AUT = NA, FirstVar = NA, FirstVarShapiro=NA, IsFirstNormal = NA, SecondVar = NA, SecondVarShapiro = NA, IsSecondNormal = NA, Test=NA, TestValue=NA, Outcome=NA, EffSize=NA )
names(tests) <- names
tests <- tests[FALSE,]

for(AUT in AUTs){
  printf("AUT: %s\n",AUT)
  for(sUET1 in sizes){ #UET
    printf("%d UET\n",sUET1)
    #check if first var is normal
    firstVar <- df[which(df$aut==AUT & df$size==sUET1 & df$strategy=='IET'),]$coverage
    #firstVarKS <- ks.test(firstVar,"pnorm")$p.value
    if(length(unique(firstVar))>1 & length(firstVar)<=5000){
      firstVarShapiro <- shapiro.test(firstVar)$p.value
    }
    else {
      firstVarShapiro <- 0
    }
    isFirstVarNormal <- firstVarShapiro > 0.05
    
    for(sUET2 in sizes){ #IET
      printf("%d UET2\n",sUET2)
      #check if second var is normal
      secondVar <- df[which(df$aut==AUT & df$size==sUET2 & df$strategy=='IET'),]$coverage
      #secondVarKS <- ks.test(secondVar,"pnorm")$p.value
      if(length(unique(secondVar))>1 & length(secondVar)<=5000){
        secondVarShapiro <- shapiro.test(secondVar)$p.value
      }
      else {
        secondVarShapiro <- 0
      }
      isSecondVarNormal <- secondVarShapiro > 0.05
      
      if(isFirstVarNormal & isSecondVarNormal) { #non accade mai
        printf("T-TEST???")
        test <- "T-test"
      } else {
        test <- "Wilcoxon"
        #testV <- wilcox.test(coverage ~ size, data=df, alternative=alt, subset = df$aut==AUT & ( (df$size==sUET1 & df$strategy=='UET') | (df$size==sUET2 & df$strategy=='UET') )  )       
        
        #extract subsample
        # ssize<-5000#min(length(firstVar),length(secondVar))
        # if(length(firstVar)>ssize)
        #   firstVar <- sample(firstVar,ssize)
        # if(length(secondVar)>ssize)
        #   secondVar <- sample(secondVar,ssize)
        # 
        testV_less <- wilcox.test(firstVar, secondVar, alternative="less")
        testV_greater <- wilcox.test(firstVar, secondVar, alternative="greater")
        #testValue <- as.numeric(format(testV$p.value,digits=2, nsmall=2))
        testValue_less <- testV_less$p.value
        testValue_greater <- testV_greater$p.value
        if(testValue_less <= 0.05 & testValue_greater > 0.05){
          outcome <- "less"#sprintf("Refuse: N=%d UET coverage is less than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_less
        }
        else if (testValue_less > 0.05 & testValue_greater <= 0.05) {
          outcome <- "greater"#sprintf("Refuse: N=%d UET coverage is greater than N=%d UET coverage",sUET1,sUET2);
          testValue <- testValue_greater
        } else if (testValue_less <= 0.05 & testValue_greater <= 0.05){
          print("SHOULD NEVER HAPPEN!")
        } else {
          outcome <- "Not refuse"
          testValue=0
        }
      }
      
      ssize<-5000#min(length(firstVar),length(secondVar))
      if(length(firstVar)>ssize)
        firstVar <- sample(firstVar,ssize)
      if(length(secondVar)>ssize)
        secondVar <- sample(secondVar,ssize)
      
      effSize = cliff.delta(firstVar,secondVar)$estimate
      
      record <- data.frame(
        AUT, 
        sUET1, firstVarShapiro, isFirstVarNormal,
        sUET2, secondVarShapiro, isSecondVarNormal,
        test, testValue, outcome, effSize
      )
      names(record) <- names
      
      tests <- rbind(tests,record)
      
    }
  }
}

#tests <- readRDS(file="tests_ce_effsize_clean_IET.rds")
tests$Outcome <- as.factor(tests$Outcome)

tests$EffSize <- abs(tests$EffSize)
tests$EffSizeCat <- rep("foo",1296)

tests[which(tests$EffSize>=0.474),]$EffSizeCat <- "large"
tests[which(tests$EffSize>=0.33 & tests$EffSize<0.474),]$EffSizeCat <- "medium"
tests[which(tests$EffSize>=0.148 & tests$EffSize<0.33),]$EffSizeCat <- "small"
tests[which(tests$EffSize<0.148),]$EffSizeCat <- "none"

tests$EffSize <- as.factor(tests$EffSizeCat)



p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=EffSizeCat,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_d() +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(~AUT) +
  #scale_x_continuous(breaks=c(2:9)) +
  #scale_y_continuous(breaks=c(2:9)) +
  theme(legend.position="top")+
  ggtitle("IET vs IET")

print(p)



p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=EffSize,width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_c() +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(~AUT) +
  #scale_x_continuous(breaks=c(2:9)) +
  #scale_y_continuous(breaks=c(2:9)) +
  theme(legend.position="top")

print(p)




p <- ggplot(tests, aes(x=FirstVar,y=SecondVar))+
  geom_tile(aes(fill=Outcome, alpha=TestValue, width=0.9, height=0.9))+
  #scale_fill_brewer(palette="Viridis") +
  scale_fill_viridis_d() +
  scale_alpha(range=c(1,0.3)) +
  theme_bw() +
  facet_wrap(~AUT) +
  # scale_x_continuous(breaks=c(2:9)) +
  # scale_y_continuous(breaks=c(2:9)) +
  theme(legend.position="top") +
  ggtitle("IET vs IET Hypotheses")

print(p)
ggsave(filename=paste(plotDir,"hypotheses_ce.png",sep="/"))

saveRDS(tests, file="tests_ce.rds")
#write.csv(tests,"tests.csv", row.names = FALSE)



#table(dataCE[which(dataCE$strategy=='IET' & dataCE$aut=='MunchLife' & dataCE$size==14),]$coverage)