#merge multiple csv files

dataPath = "./data"

for(repetition in c("cs","ce")) { #repetition with Computer Science (CS) students and Computer Engineering (CE) students
  
  if(!file.exists(paste(dataPath,repetition,'data.rds',sep="/"))) {#skip data merge if data.rds already exists
    
    #read csv files
    csv_files = list.files(path=dataPath, pattern="*.csv",recursive = TRUE)
    
    data = read.table(paste(dataPath,csv_files[1],sep='/'),sep=";",dec=".",header=TRUE)
    if(ncol(data)==1) {
      #it was one of the old files with the comma separator
      data = read.table(paste(dataPath,csv_files[1],sep='/'),sep=",",dec=".",header=TRUE)  
    }
    
    for (csv_file in csv_files[2:length(csv_files)]) {
      df = read.table(paste(dataPath,csv_file,sep='/'),sep=";",dec=".",header=TRUE)
      if(ncol(df)==1) {
        #it was one of the old files with the comma separator
        df = read.table(paste(dataPath,csv_file,sep='/'),sep=",",dec=".",header=TRUE)  
      }
      data <- rbind(data,df)
    }
    
    #replace "whiteBox" e "WhiteBox" with "White Box"
    data$strategy <- as.character(data$strategy)
    data$strategy[data$strategy =="whiteBox"] <- "IET"
    data$strategy[data$strategy =="WhiteBox"] <- "IET"
    data$strategy[data$strategy =="BlackBox"] <- "UET"
    data$strategy <- as.factor(data$strategy)
    data$coverage <- as.numeric(data$coverage)
    
    saveRDS(data, paste(dataPath,repetition,'data.rds',sep="/"))
    print(paste(dataPath,repetition,'data.rds was created',sep="/"))
    
  } else {
    print(paste(dataPath,repetition,'data.rds already exists',sep="/"))
  }
  
}
  
  # # plot data
  # p <- ggplot(data, aes(y=coverage,x=size,group=size)) +
  #   stat_boxplot(geom ='errorbar') +
  #   geom_boxplot(aes(fill=strategy)) + 
  #   #facet_grid(strategy ~ aut) +
  #   facet_wrap(strategy ~ aut, scales="free_y",ncol=4) +
  #   ylab("LOC Coverage (%)") +
  #   xlab("Size (number of testers)") +
  #   theme(legend.position="top")
  # print(p)