This file contains the data analyses of our empirical evaluation regarding the classification of generated white-box tests. The goal of this document is to provide detailed description of how we analyzed the data obtained from the study. As we had two studies – an original and a replication – the analyses are divided correspondingly.
Each participant answer is stored in a JSON file. These are read into data frames.
read_answers_from_folder <- function(results_folder) {
answer_files <- list.files(results_folder, pattern="*.json", recursive=TRUE, full.names=TRUE)
result_answers <- data.frame(row.names = c("PID","Id","IsOK"), stringsAsFactors = FALSE)
for (json in answer_files) {
# Getting the answers of the participants from the JSON file
answers <- data.frame(fromJSON(readLines(json),simplifyDataFrame=TRUE),stringsAsFactors = FALSE)
# Getting the PID from the base directory
if(suppressWarnings(!is.na(as.numeric(basename(dirname(json)))))) {
pid <- as.numeric(rep(basename(dirname(json)),nrow(answers))) # For the original study
} else {
pid <- as.numeric(rep(basename(dirname(dirname(json))),nrow(answers))) # For the replication
}
# Creating the column for the participant identifier
answers_with_pid <- cbind(pid,answers)
# Appending the new rows the previouses
result_answers <- rbind(result_answers,answers_with_pid)
}
return(result_answers)
}
result_answers <- read_answers_from_folder(params$results_folder)
r_result_answers <- read_answers_from_folder(params$r_results_folder)
The answers are extended with project and method information based on the identifiers in the result file.
# Adding the project column
result_answers$Project <- ifelse(result_answers$pid>40,"MathNet","NBitcoin")
r_result_answers$Project <- ifelse(r_result_answers$pid>40,"NetTopologySuite","NodaTime")
# Coding method information to an array
nbitcoin_methods <- c(
rep("CompareTo",3),
rep("Constructor",3),
rep("Equals",3),
rep("Min",3),
rep("Plus",3)
)
mathnet_methods <- c(
rep("Combinations",3),
rep("CombinationsWithRepetition",3),
rep("Permutations",3),
rep("Variations",3),
rep("VariationsWithRepetition",3)
)
nodatime_methods <- c(
rep("AddTo",3),
rep("Between",3),
rep("HasDateComponent",3),
rep("Minus",3),
rep("ToDuration",3)
)
nettopology_methods <- c(
rep("Extract",3),
rep("IncreasingDirection",3),
rep("IndexOf",3),
rep("IsRing",3),
rep("MinCoordinate",3)
)
# Creating the mapping between test identifiers, projects and methods
test_method_mapping <- data.frame(
Results.Id = rep(0:14,2),
Method = c(nbitcoin_methods,mathnet_methods),
Project = c(rep("NBitcoin",15),rep("MathNet",15))
)
r_test_method_mapping <- data.frame(
Results.Id = rep(0:14,2),
Method = c(nodatime_methods,nettopology_methods),
Project = c(rep("NodaTime",15),rep("NetTopologySuite",15))
)
# Adding the method name to each row
result_answers <- plyr::join(result_answers,test_method_mapping,by=c("Results.Id","Project"))
r_result_answers <- plyr::join(r_result_answers,r_test_method_mapping,by=c("Results.Id","Project"))
# Adding test identifiers to each row
result_answers$TestId <- car::recode(as.factor(result_answers$Results.Id),"0='T0.1'; 1='T1.1'; 2='T2.1'; 3='T3.2'; 4='T4.2'; 5='T5.2'; 6='T6.3'; 7='T7.3'; 8='T8.3'; 9='T9.4'; 10='T10.4'; 11='T11.4'; 12='T12.5'; 13='T13.5'; 14='T14.5'")
r_result_answers$TestId <- car::recode(as.factor(r_result_answers$Results.Id),"0='T0.1'; 1='T1.1'; 2='T2.1'; 3='T3.2'; 4='T4.2'; 5='T5.2'; 6='T6.3'; 7='T7.3'; 8='T8.3'; 9='T9.4'; 10='T10.4'; 11='T11.4'; 12='T12.5'; 13='T13.5'; 14='T14.5'")
rm(nbitcoin_methods,mathnet_methods,nodatime_methods,nettopology_methods,test_method_mapping,r_test_method_mapping)
Parsing the golden answers created prior to the study. These are used to evaluate participants’ answers.
nbitcoin_golden_answers <- read.csv(file.path(params$golden_folder,"nbitcoin-gold.csv"), header=TRUE, sep = ";",na.strings=c("",""),stringsAsFactors = FALSE)
mathnet_golden_answers <- read.csv(file.path(params$golden_folder,"mathnet-gold.csv"), header=TRUE, sep = ";",na.strings=c("",""),stringsAsFactors = FALSE)
nodatime_golden_answers <- read.csv(file.path(params$r_golden_folder,"nodatime-gold.csv"), header=TRUE, sep = ";",na.strings=c("",""),stringsAsFactors = FALSE)
nettopology_golden_answers <- read.csv(file.path(params$r_golden_folder,"nettopology-gold.csv"), header=TRUE, sep = ";",na.strings=c("",""),stringsAsFactors = FALSE)
Before the study session, each participant filled a background questionnaire. The answers are parsed from a CSV file.
parse_background_answers <- function(results_folder) {
bg_answers <- list.files(results_folder, pattern="background.csv", recursive=TRUE, full.names=TRUE)
result_bg_answers <- data.frame()
for (answer_file in bg_answers) {
answers <- read.csv(answer_file,header=TRUE,sep=";",as.is = TRUE)
colnames(answers)[1] <- "PID"
result_bg_answers <- rbind(result_bg_answers,answers)
}
}
result_bg_answers <- parse_background_answers(params$results_folder)
r_result_bg_answers <- parse_background_answers(params$r_results_folder)
After the study, each participant filled a survey, which contained questions about the difficulties of the task performed. The results are parsed from a CSV.
parse_exit_answers <- function(results_folder) {
exit_answers <- list.files(results_folder, pattern="exit.csv", recursive=TRUE, full.names=TRUE)
result_exit_answers <- data.frame()
for (answer_file in exit_answers) {
answers <- read.csv(answer_file,header=TRUE,sep=";",as.is = TRUE)
colnames(answers)[1] <- "PID"
result_exit_answers <- rbind(result_exit_answers,answers)
}
return(result_exit_answers)
}
exit_answers <- parse_exit_answers(params$results_folder)
r_exit_answers <- parse_exit_answers(params$r_results_folder)
The activities of participants in the original study are extracted using coding of the screen recordings. These are saved to CSV files and parsed to data frames.
video_files <- list.files(params$results_folder, pattern="video.csv", recursive=TRUE, full.names=TRUE)
video_lengths <- data.frame()
result_videos <- data.frame()
for (video in video_files) {
# Getting the number of lines to skip at head (max. 20)
lines_to_skip <- grep("^Time,", readLines(video, n = 20))-1
# Reading annotations
annotations <- read.csv(video,header=TRUE,skip=lines_to_skip,sep=",",as.is = TRUE)
# Creating PID column based on the number of rows
pid <- rep(strsplit(readLines(video,n=1),split = ',')[[1]][2],nrow(annotations))
# Adding the PID column to the annotations
annotations_with_pid <- cbind(pid, annotations)
# Appending the new annotations to the previouses
result_videos <- rbind(result_videos,annotations_with_pid)
# Appending video length
video_lengths <- filter(result_videos %>% select(pid,Media.total.length) %>% distinct_(),pid != 55 & pid != 59)
}
video_lengths_nbitcoin <- filter(video_lengths, as.numeric(pid) < 41)
video_lengths_mathnet <- filter(video_lengths, as.numeric(pid) > 40)
rm(video,lines_to_skip,video_files,annotations,annotations_with_pid,pid)
The replication study activities were properly recorded in log files (as oppose to the original study), thus we use it to easily obtain activity-related information.
parse_logs <- function(project_log) {
activity_files <- list.files(params$r_results_folder, pattern = project_log, recursive = TRUE, full.names = TRUE)
raw_log_data <- tibble()
for(file in activity_files) {
# Parse the file with the | separator
portal_log <- read.table(file, fill=TRUE, header=FALSE, sep="|", colClasses=c(rep("character", 4)),stringsAsFactors = FALSE)
names(portal_log) <- c("Date","Action","Location","Mark")
# Grab the experiment-vs.log file in the corresponding folder
vs_log <- read.csv(file.path(dirname(dirname(file)),"experiment-vs.log"), header=F, sep = "|",na.strings=c("",""),stringsAsFactors = FALSE)
names(vs_log) <- c("Date","Action","Location")
# Merge the activities by the date column
joined <- portal_log %>% bind_rows(vs_log)
joined$PID <- rep(basename(dirname(dirname(file))),length(joined$Date))
joined <- joined %>% arrange(Date)
raw_log_data <- raw_log_data %>% bind_rows(joined)
}
return(raw_log_data)
}
nodatime_test_to_seq <- function(filePath) {
if(grepl("AddToTest322", filePath, fixed=TRUE)) {
return("T0.1")
} else if(grepl("AddToTest327",filePath, fixed=TRUE)) {
return("T1.1")
} else if(grepl("AddToTestThrowsDivideByZeroException305",filePath, fixed=TRUE)) {
return("T2.1")
} else if(grepl("BetweenTest171", filePath, fixed=TRUE)) {
return("T3.2")
} else if(grepl("BetweenTest410", filePath, fixed=TRUE)) {
return("T4.2")
} else if(grepl("BetweenTestThrowsArgumentException616",filePath, fixed=TRUE)) {
return("T5.2")
} else if(grepl("HasDateComponentGetTest386", filePath, fixed=TRUE)) {
return("T6.3")
} else if(grepl("HasDateComponentGetTest407",filePath, fixed=TRUE)) {
return("T7.3")
} else if(grepl("HasDateComponentGetTest757", filePath, fixed=TRUE)) {
return("T8.3")
} else if(grepl("MinusTest479",filePath, fixed=TRUE)) {
return("T9.4")
} else if(grepl("MinusTestThrowsArgumentNullException26", filePath, fixed=TRUE)) {
return("T10.4")
} else if(grepl("MinusTestThrowsArgumentNullException333", filePath, fixed=TRUE)) {
return("T11.4")
} else if(grepl("ToDurationTest01", filePath, fixed=TRUE)) {
return("T12.5")
} else if(grepl("ToDurationTest122",filePath, fixed=TRUE)) {
return("T13.5")
} else if(grepl("ToDurationTestThrowsInvalidOperationException782",filePath, fixed=TRUE)) {
return("T14.5")
} else if(grepl("Period.cs", filePath, fixed=TRUE)) { # CUT
return("CUT")
} else if(grepl("PeriodTest.cs", filePath, fixed=TRUE)) { # PUT
return("PUT")
} else { # SUT
return("SUT")
}
}
nettopology_test_to_seq <- function(filePath) {
if(grepl("ExtractTest861", filePath, fixed=TRUE)) {
return("T0.1")
} else if(grepl( "ExtractTestThrowsArgumentException547",filePath, fixed=TRUE)) {
return("T1.1")
} else if(grepl("ExtractTestThrowsArgumentException834",filePath, fixed=TRUE)) {
return("T2.1")
} else if(grepl("IncreasingDirectionTest112", filePath, fixed=TRUE)) {
return("T3.2")
} else if(grepl("IncreasingDirectionTest202", filePath, fixed=TRUE)) {
return("T4.2")
} else if(grepl("IncreasingDirectionTest578",filePath, fixed=TRUE)) {
return("T5.2")
} else if(grepl("IndexOfTest510", filePath, fixed=TRUE)) {
return("T6.3")
} else if(grepl("IndexOfTest618",filePath, fixed=TRUE)) {
return("T7.3")
} else if(grepl("IndexOfTest807", filePath, fixed=TRUE)) {
return("T8.3")
} else if(grepl( "IsRingTest488",filePath, fixed=TRUE)) {
return("T9.4")
} else if(grepl("IsRingTest617", filePath, fixed=TRUE)) {
return("T10.4")
} else if(grepl("IsRingTest89", filePath, fixed=TRUE)) {
return("T11.4")
} else if(grepl("MinCoordinateTest441", filePath, fixed=TRUE)) {
return("T12.5")
} else if(grepl("MinCoordinateTest901",filePath, fixed=TRUE)) {
return("T13.5")
} else if(grepl("MinCoordinateTest993",filePath, fixed=TRUE)) {
return("T14.5")
} else if(grepl("CoordinateArrays.cs", filePath, fixed=TRUE)) { # CUT
return("CUT")
} else if(grepl("CoordinateArraysTest.cs", filePath, fixed=TRUE)) { # PUT
return("PUT")
} else { # SUT
return("SUT")
}
}
testSeq_to_seq <- function(testSeq) {
s <- as.integer(testSeq)+1
id <- ceiling(s/3)
return(paste("T",testSeq,".",id,sep = ""))
}
# These were the start times for the sessions
nodatime_start_time <-strptime("11/30/2017 3:00:00 PM", format = "%m/%d/%Y %I:%M:%S %p")
nettopology_start_time <- strptime("12/07/2017 2:52:00 PM", format="%m/%d/%Y %I:%M:%S %p")
# Parsing the logs
nodatime_log <- suppressWarnings(parse_logs("NodaTime.log"))
nettopology_log <- suppressWarnings(parse_logs("NetTopologySuite.log"))
The answers of each participant is compared with the golden answers. The comparison yields four outcomes using binary classification: TP, TN, FP, FN. We marked a case as positive where the test encoded an error, while negative if the test was error-free.
perform_answer_comparison <- function(result_answers, golden_answers) {
participant_results <- data.frame()
# Iterating through participants
for(pid in unique(result_answers$pid)) {
result_column <- c()
# Iterating through answers for checking correctness
for(test in result_answers[result_answers$pid == pid,]$Results.Id) {
participant_answer<- result_answers[result_answers$pid == pid & result_answers$Results.Id == test,]$Results.IsOK
if(golden_answers[golden_answers$id == test,]$isok == TRUE) {
# If the golden answer is OK
if(participant_answer == TRUE) {
result_column <- rbind(result_column,"TN")
} else {
result_column <- rbind(result_column,"FP")
}
} else {
# If the golden answer is Wrong
if(participant_answer == FALSE) {
result_column <- rbind(result_column,"TP")
} else {
result_column <- rbind(result_column,"FN")
}
}
}
participant_result_table <- cbind(result_answers[result_answers$pid == pid,],result_column)
colnames(participant_result_table)[7] <- "Check"
participant_results <- rbind(participant_results,participant_result_table)
}
return(participant_results)
}
nbitcoin_participant_results <- perform_answer_comparison(result_answers %>% filter(pid < 41), nbitcoin_golden_answers)
mathnet_participant_results <- perform_answer_comparison(result_answers %>% filter(pid >= 41), mathnet_golden_answers)
participant_results <- nbitcoin_participant_results %>% bind_rows(mathnet_participant_results)
nodatime_participant_results <- perform_answer_comparison(r_result_answers %>% filter(pid < 41), nodatime_golden_answers)
nettopology_participant_results <- perform_answer_comparison(r_result_answers %>% filter(pid >= 41), nettopology_golden_answers)
r_participant_results <- nodatime_participant_results %>% bind_rows(nettopology_participant_results)
In the snippet below, we summarise the results of the binary classification for further analyses.
summarise_results <- function(participant_results) {
participant_result_summary <- data.frame(row.names = c("PID","Project","TP","FP","TN","FN"),stringsAsFactors=FALSE)
# Iterating through participants
for(pid in unique(participant_results$pid)) {
table_with_checks <- participant_results[participant_results$pid == pid,]
tp_count <- length(which(table_with_checks$Check == "TP"))
fp_count <- length(which(table_with_checks$Check == "FP"))
tn_count <- length(which(table_with_checks$Check == "TN"))
fn_count <- length(which(table_with_checks$Check == "FN"))
project = participant_results[participant_results$pid == pid,]$Project[1]
participant_summary_row <- data.frame(PID=pid, Project=project, TP=tp_count, FP=fp_count, TN=tn_count, FN=fn_count)
participant_result_summary <- rbind(participant_summary_row, participant_result_summary)
}
return(participant_result_summary)
}
participant_result_summary <- summarise_results(participant_results)
r_participant_result_summary <- summarise_results(r_participant_results)
Based on the binary classification summaries, we calculate the Accuracy, Misclassification rate, Sensitivity, Specificity, False Positive rate and the Matthews Correlation Coefficient statistics for each participant in both studies.
prs <- participant_result_summary
participant_result_summary$Accuracy <- (prs$TP + prs$TN) / 15 # (TP+TN)/(TP+TN+FP+FN)
participant_result_summary$Misclassification <- (prs$FP + prs$FN) / 15
participant_result_summary$Sensitivity <- (prs$TP)/(prs$TP + prs$FN) # (hit rate)
participant_result_summary$Specificity <- (prs$TN)/(prs$TN + prs$FP)
prs <- participant_result_summary
participant_result_summary$FalsePositiveRate <- 1 - prs$Specificity # False positive rate
participant_result_summary$MCC <- ((prs$TP*prs$TN)-(prs$FP*prs$FN))/sqrt((prs$TP+prs$FP)*(prs$TP+prs$FN)*(prs$TN+prs$FP)*(prs$TN+prs$FN))
prs <- r_participant_result_summary
r_participant_result_summary$Accuracy <- (prs$TP + prs$TN) / 15 # (TP+TN)/(TP+TN+FP+FN)
r_participant_result_summary$Misclassification <- (prs$FP + prs$FN) / 15
r_participant_result_summary$Sensitivity <- (prs$TP)/(prs$TP + prs$FN) # (hit rate)
r_participant_result_summary$Specificity <- (prs$TN)/(prs$TN + prs$FP)
prs <- r_participant_result_summary
r_participant_result_summary$FalsePositiveRate <- 1 - prs$Specificity # False positive rate
r_participant_result_summary$MCC <- ((prs$TP*prs$TN)-(prs$FP*prs$FN))/sqrt((prs$TP+prs$FP)*(prs$TP+prs$FN)*(prs$TN+prs$FP)*(prs$TN+prs$FN))
rm(prs)
The following two tables contain the top 5-5 participants results based on their reached MCC for the original and the replication study, respectively. Note that there were ties in MCC values, thus more than 5 participants are listed.
PID | Project | TP | FP | TN | FN | Accuracy | Misclassification | Sensitivity | Specificity | FalsePositiveRate | MCC |
---|---|---|---|---|---|---|---|---|---|---|---|
47 | MathNet | 3 | 0 | 12 | 0 | 1.0000000 | 0.0000000 | 1 | 1.0000000 | 0.0000000 | 1.0000000 |
10 | NBitcoin | 3 | 0 | 12 | 0 | 1.0000000 | 0.0000000 | 1 | 1.0000000 | 0.0000000 | 1.0000000 |
64 | MathNet | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
8 | NBitcoin | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
2 | NBitcoin | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
11 | NBitcoin | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
PID | Project | TP | FP | TN | FN | Accuracy | Misclassification | Sensitivity | Specificity | FalsePositiveRate | MCC |
---|---|---|---|---|---|---|---|---|---|---|---|
62 | NetTopologySuite | 3 | 0 | 12 | 0 | 1.0000000 | 0.0000000 | 1 | 1.0000000 | 0.0000000 | 1.0000000 |
22 | NodaTime | 3 | 0 | 12 | 0 | 1.0000000 | 0.0000000 | 1 | 1.0000000 | 0.0000000 | 1.0000000 |
72 | NetTopologySuite | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
52 | NetTopologySuite | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
51 | NetTopologySuite | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
49 | NetTopologySuite | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
45 | NetTopologySuite | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
8 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
7 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
32 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
31 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
25 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
18 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
16 | NodaTime | 3 | 1 | 11 | 0 | 0.9333333 | 0.0666667 | 1 | 0.9166667 | 0.0833333 | 0.8291562 |
The following box plots are showing different measures of binary classification separated by the projects.
draw_binary_boxplots <- function(participant_result_summary) {
# Accuracy
acc <- ggplot(participant_result_summary, aes(x=factor(Project), y=Accuracy)) +
geom_boxplot() +
xlab("Project") +
ylab("Accuracy") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(0,1), breaks=seq(0,1,0.1))
# Misclassification
misc <- ggplot(participant_result_summary, aes(x=factor(Project), y=Misclassification)) +
geom_boxplot() +
xlab("Project") +
ylab("Misclassification") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(0,1), breaks=seq(0,1,0.1))
# False positive rate
fpr <- ggplot(participant_result_summary, aes(x=factor(Project), y=FalsePositiveRate)) +
geom_boxplot() +
xlab("Project") +
ylab("False Positive Rate") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(0,1), breaks=seq(0,1,0.1))
# Sensitivity (TPR)
# IN PAPER
#pdf(file="boxplot-sensitivity.pdf",width=3.5,height=2.5)
tpr <- ggplot(participant_result_summary, aes(x=factor(Project), y=Sensitivity)) +
geom_boxplot() +
xlab("Project") +
ylab("TPR") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(0,1), breaks=seq(0,1,0.1))
#dev.off()
# Specificity (true negative rate)
# IN PAPER
#pdf(file="boxplot-tnr.pdf",width=3.5,height=2.5)
tnr <- ggplot(participant_result_summary, aes(x=factor(Project), y=Specificity)) +
geom_boxplot() +
xlab("Project") +
ylab("TNR") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(0,1), breaks=seq(0,1,0.1))
#dev.off()
# Matthews Correlation Coefficient
# IN PAPER
#pdf(file="boxplot-matthews.pdf",width=3.5,height=2.5)
mcc <- ggplot(participant_result_summary, aes(x=factor(Project), y=MCC)) +
geom_boxplot() +
xlab("Project") +
ylab("MCC") +
guides(fill=FALSE) +
theme_hc() +
scale_y_continuous(limits=c(-1,1), breaks=seq(-1,1,0.2))
#dev.off()
return(list(acc,misc,fpr,tpr,tnr,mcc))
}
The following plots visualize all of the answer results (TP, FP, TN, FN) given by participants in both studies.
draw_classification_grid <- function(participant_results) {
cb_palette <- c("#680008", "#e22828", "#2cba5a", "#01703e")
project_a <- participant_results[participant_results$pid < 41,]$Project
project_b <- participant_results[participant_results$pid > 40,]$Project
# IN PAPER
#pdf(file="a-tile-map.pdf",width=6.5,height = 3.5)
a_grid <- ggplot(participant_results[participant_results$pid < 41,], aes(x=factor(pid), y=reorder(factor(TestId),Results.Id), fill=as.factor(Check))) +
ggtitle(project_a) +
geom_tile(alpha=0.8,width=.9, height=.9) +theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) + scale_fill_manual(values=cb_palette) + xlab("Participant ID") + ylab("Test ID") + labs(fill="Result")
#dev.off()
# IN PAPER
#pdf(file="b-tile-map.pdf",width=5.5,height = 3.5)
b_grid <- ggplot(participant_results[participant_results$pid > 40,], aes(x=factor(pid), y=reorder(factor(TestId),Results.Id), fill=as.factor(Check))) +
ggtitle(project_b) +
geom_tile(alpha=0.8,width=.9, height=.9) +theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) + scale_fill_manual(values=cb_palette) + xlab("Participant ID") + ylab("Test ID") + labs(fill="Result")
#dev.off()
return(list(a_grid,b_grid))
}
The following two charts show the participant’s answers for the exit survey. They questions were regarding with their feelings and impressions after the task.
questions = c(
"I had enough time to understand the class.",
"I had enough time to review the tests.",
"It was easy to understand the class.",
"It was easy to understand the tests.",
"I am certain I chose the right answers.",
"Tests are difficult to read.",
"Tests are too long to understand.",
"Tests are too short to exercise useful behavior.",
"Tests had too many assertions.",
"Tests had meaningful assertions.",
"It was easy to select faulty tests."
)
options = c(
"Fully\nagree",
"Partially\nagree",
"Neither agree\nnor disagree",
"Partially\ndisagree",
"Fully\ndisagree"
)
extract_likert_data <- function(answers) {
answers_corrected <- answers[answers$agreement2 != "",]
data <- subset(answers_corrected, select = c(agreement1, agreement2, agreement3, agreement4, agreement5, agreement6, agreement7, agreement8, agreement9, agreement10, agreement11)
)
names(data) <- questions
for(i in 1:length(questions)){
data[data[,i] == 'fagree',i] <- "Fully\nagree"
data[data[,i] == 'pagree',i] <- "Partially\nagree"
data[data[,i] == 'neither',i] <- "Neither agree\nnor disagree"
data[data[,i] == 'pdisagree',i] <- "Partially\ndisagree"
data[data[,i] == 'fdisagree',i] <- "Fully\ndisagree"
data[,i] <- factor(data[,i], levels = options)
}
return(data)
}
likert_data <- extract_likert_data(exit_answers)
ldt <- likert(subset(likert_data, select=questions), nlevels = length(options))
# IN PAPER
#pdf(file="c:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\likert.pdf", width=12)
plot(ldt, ordered=FALSE) + theme(legend.title=element_blank(),legend.position = "bottom")
#dev.off()
likert_data_rep <- extract_likert_data(r_exit_answers)
ldt <- likert(subset(likert_data_rep, select=questions), nlevels = length(options))
# IN PAPER
#pdf(file="c:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\likert-rep.pdf", width=12)
plot(ldt, ordered=FALSE) + theme(legend.title=element_blank(),legend.position = "bottom")
#dev.off()
To obtain knowledge about how participants behaved during the studies, we extract their actions from the video logs (in case of the original study), and from the activity logs (in case of the replication study).
As we had technical difficulties with logging the activities in the first study, we reconstructed the logs from the videos by coding each activity to analyze.
extended_video_annotations <- data.frame(row.names = c("PID","Timestamp","Behavior","Modifier","Active","Page","Window","RunCase","DebugLength","DebugCase"),stringsAsFactors = FALSE)
vs_time_summary <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5","S","C","P","Total"), stringsAsFactors = FALSE)
portal_time_summary <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
marked_wrong_summary <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
marked_ok_summary <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
cut_time_for_test <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
sut_time_for_test <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
put_time_for_test <- data.frame(row.names = c("PID","T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), stringsAsFactors = FALSE)
# Iterating through participants
for(pid in unique(result_answers$pid)) {
annotations_for_participant <- result_videos[result_videos$pid == pid,]
vs_time_summary_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, S = 0, C = 0, P = 0, Total = 0, stringsAsFactors = FALSE)
portal_time_summary_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
marked_wrong_summary_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
marked_ok_summary_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
cut_time_for_test_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
sut_time_for_test_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
put_time_for_test_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0)
# Iterating through the annotations of the participant
currently_active <- NA
vs_window <- NA
portal_page <- NA
debug_start_index <- NA
vs_start_index <- -1 # -1: VS has not been activated yet, otherwise the row index of activation point
window_start_index <- -1 # -1: A window in VS has not been activated yet, otherwise the row index of activation point
page_start_index <- -1 # -1: A page in the portal has not been activated yet, otherwise the row index of activation point
# Iterating through each row of a participant
for(i in 1:nrow(annotations_for_participant)) {
# Getting the corresponding row
row <- annotations_for_participant[i,]
# If the row is na somehow (due to some R bug)
if(is.na(row$pid)) {
break;
}
marked_case <- NA
run_case <- NA
debug_time <- NA
debug_case <- NA
# If the portal has been activated
if(row$Behavior == "Portal activated") {
currently_active <- "Portal" # Setting the currently active variable to Portal
if(is.na(portal_page)) {
# If the portal is activated and there were no previous pages, then the home page (H) will open
portal_page = "H"
}
page_start_index <- i
##### VS full timer summarization #####
# If there was a VS activation before, it must be stopped and added to the summary
if(vs_start_index != -1) {
# The elapsed time is the current row timestamp minus the previous VS activation timestamp
elapsed_vs_time <- row$Time - annotations_for_participant[annotations_for_participant$pid == pid,][vs_start_index,]$Time
# The elapsed time is added to the row
vs_time_summary_row$Total <- vs_time_summary_row$Total + elapsed_vs_time
}
vs_start_index <- NA # There is no start index for VS, waiting for the next
##### VS window timer summarization #####
# If there was a VS window activation before, it must stopped and added to the summary
if(window_start_index != -1) {
# Getting the row of previous window activation in VS
window_start_row <- extended_video_annotations[extended_video_annotations$PID == pid,][window_start_index,]
# Calculating the elapsed time in that particular window
elapsed_window_time <- row$Time - window_start_row$Time
# If the window was not C, S, N, P or was not NA (it is a numbered test case)
if(window_start_row$Window != "C" && window_start_row$Window != "S" && window_start_row$Window != "N" && window_start_row$Window != "P" && !is.na(window_start_row$Window)) {
# e.g., TC0 -> 2. column index
vs_time_summary_row[,as.numeric(window_start_row$Window)+2] <- vs_time_summary_row[,as.numeric(window_start_row$Window)+2] + elapsed_window_time
} else if(window_start_row$Window != "N" && !is.na(window_start_row$Window)) {
vs_time_summary_row[,window_start_row$Window] <- vs_time_summary_row[,window_start_row$Window] + elapsed_window_time
# Summarizing class time for given test
if(!is.na(window_start_row$Page) && window_start_row$Page != "H") {
if(window_start_row$Window == "C") {
cut_time_for_test_row[,as.numeric(window_start_row$Page)+2] <- cut_time_for_test_row[,as.numeric(window_start_row$Page)+2] + elapsed_window_time
}
if(window_start_row$Window == "S") {
sut_time_for_test_row[,as.numeric(window_start_row$Page)+2] <- sut_time_for_test_row[,as.numeric(window_start_row$Page)+2] + elapsed_window_time
}
if(window_start_row$Window == "P") {
put_time_for_test_row[,as.numeric(window_start_row$Page)+2] <- put_time_for_test_row[,as.numeric(window_start_row$Page)+2] + elapsed_window_time
}
}
}
}
window_start_index <- NA
} else if(row$Behavior == "VS activated") {
currently_active <- "VS"
# Start VS timer
vs_start_index <- i
window_start_index <- i
# stopping Portal timers
if(page_start_index != -1) {
page_start_row <- extended_video_annotations[extended_video_annotations$PID == pid,][page_start_index,]
elapsed_page_time <- row$Time - page_start_row$Time
if(page_start_row$Page != "H" && !is.na(page_start_row$Page)) {
# e.g., TC0 -> 2. column index
portal_time_summary_row[,as.numeric(page_start_row$Page)+2] <- portal_time_summary_row[,as.numeric(page_start_row$Page)+2] + elapsed_page_time
}
}
page_start_index <- NA
} else if(row$Behavior == "Changed page in portal") {
portal_page <- row$Modifier.1
if(page_start_index != -1) {
page_start_row <- extended_video_annotations[extended_video_annotations$PID == pid,][page_start_index,]
elapsed_page_time <- row$Time - page_start_row$Time
if(page_start_row$Page != "H" && !is.na(page_start_row$Page)) {
# e.g., TC0 -> 2. column index
portal_time_summary_row[,as.numeric(page_start_row$Page)+2] <- portal_time_summary_row[,as.numeric(page_start_row$Page)+2] + elapsed_page_time
}
}
page_start_index <- i
} else if(row$Behavior == "Changed window in VS") {
vs_window <- row$Modifier.1
if(window_start_index != -1) {
window_start_row <- extended_video_annotations[extended_video_annotations$PID == pid,][window_start_index,]
elapsed_window_time <- row$Time - window_start_row$Time
if(window_start_row$Window != "C" && window_start_row$Window != "S" && window_start_row$Window != "N" && window_start_row$Window != "P" && !is.na(window_start_row$Window)) {
# e.g., TC0 -> 2. column index
vs_time_summary_row[,as.numeric(window_start_row$Window)+2] <- vs_time_summary_row[,as.numeric(window_start_row$Window)+2] + elapsed_window_time
} else if(window_start_row$Window != "N" && !is.na(window_start_row$Window)) {
vs_time_summary_row[,window_start_row$Window] <- vs_time_summary_row[,window_start_row$Window] + elapsed_window_time
}
}
window_start_index <- i
} else if(row$Behavior == "Marked as OK") {
marked_case <- portal_page
marked_ok_summary_row[,as.numeric(marked_case)+2] <- marked_ok_summary_row[,as.numeric(marked_case)+2] +1
} else if(row$Behavior == "Marked as WRONG") {
marked_case <- portal_page
marked_wrong_summary_row[,as.numeric(marked_case)+2] <- marked_wrong_summary_row[,as.numeric(marked_case)+2] + 1
} else if(row$Behavior == "Running test") {
run_case <- vs_window
} else if(row$Behavior == "Submit") {
} else if(row$Behavior == "Other event") {
} else if(row$Behavior == "Debug test") {
if(row$Modifier.1 == "S") {
debug_start_index <- i
debug_case <- vs_window
} else if(row$Modifier.1 == "E") {
debug_case <- extended_video_annotations[extended_video_annotations$PID == pid,][debug_start_index,]$DebugCase
debug_time <- row$Time - annotations_for_participant[annotations_for_participant$pid == pid,][debug_start_index,]$Time
debug_start_index <- NA
}
} else if(row$Behavior == "Remove answer") {
marked_case <- row$Modifier.1
} else if(row$Behavior == "Missing test problem") {
if(row$Modifier.1 == "S") {
window_start_row <- extended_video_annotations[extended_video_annotations$PID == pid,][window_start_index,]
elapsed_window_time <- row$Time - window_start_row$Time
if(window_start_row$Window != "C" && window_start_row$Window != "S" && window_start_row$Window != "N" && window_start_row$Window != "P" && !is.na(window_start_row$Window)) {
# e.g., TC0 -> 2. column index
vs_time_summary_row[,as.numeric(window_start_row$Window)+2] <- vs_time_summary_row[,as.numeric(window_start_row$Window)+2] + elapsed_window_time
} else if(window_start_row$Window != "N" && !is.na(window_start_row$Window)) {
vs_time_summary_row[,window_start_row$Window] <- vs_time_summary_row[,window_start_row$Window] + elapsed_window_time
}
} else if(row$Modifier.1 == "E") {
window_start_index <- i
}
}
extended_row <- data.frame(PID=pid,Timestamp=row$Time,Behavior=row$Behavior,Modifier=row$Modifier.1,Active=currently_active,Page=portal_page,Window=vs_window,RunCase=run_case,DebugLength=debug_time,DebugCase=debug_case,stringsAsFactors = FALSE)
extended_video_annotations <- rbind(extended_video_annotations,extended_row)
}
portal_time_summary <- rbind(portal_time_summary, portal_time_summary_row)
vs_time_summary <- rbind(vs_time_summary, vs_time_summary_row)
marked_ok_summary <- rbind(marked_ok_summary, marked_ok_summary_row)
marked_wrong_summary <- rbind(marked_wrong_summary, marked_wrong_summary_row)
cut_time_for_test <- rbind(cut_time_for_test, cut_time_for_test_row)
sut_time_for_test <- rbind(sut_time_for_test, sut_time_for_test_row)
put_time_for_test <- rbind(put_time_for_test, put_time_for_test_row)
}
portal_time_summary$check_sums <- rowSums(portal_time_summary[,c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5")])
vs_time_summary$check_sums <- rowSums(vs_time_summary[,c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5","S","C","P")])
project_column <- data.frame()
for(i in 1:nrow(vs_time_summary)) {
if(as.numeric(vs_time_summary[i,]$PID) > 40) {
project_column[i,"Project"] <- "MathNet"
} else {
project_column[i,"Project"] <- "NBitcoin"
}
}
vs_time_summary_with_projects <- cbind(vs_time_summary[order(vs_time_summary$PID),],project_column)
project_column <- data.frame()
for(i in 1:nrow(portal_time_summary)) {
if(as.numeric(portal_time_summary[i,]$PID) > 40) {
project_column[i,"Project"] <- "MathNet"
} else {
project_column[i,"Project"] <- "NBitcoin"
}
}
portal_time_summary_with_projects <- cbind(portal_time_summary[order(portal_time_summary$PID),],project_column)
# Tests individually
time_spent_tests_vs <- melt(vs_time_summary_with_projects,id.vars=c("PID","Project"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"),variable.name="Window",value.name="Time")
time_spent_tests_vs <- time_spent_tests_vs[order(time_spent_tests_vs$PID,time_spent_tests_vs$Window),]
time_spent_tests_portal <- melt(portal_time_summary_with_projects,id.vars=c("PID","Project"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"),variable.name="Page",value.name="Time")
time_spent_tests_portal <- time_spent_tests_portal[order(time_spent_tests_portal$PID,time_spent_tests_portal$Page),]
time_spent_tests <- time_spent_tests_vs
time_spent_tests$Time = time_spent_tests$Time + time_spent_tests_portal$Time
time_spent_others <- melt(vs_time_summary_with_projects,id.vars=c("PID","Project"), measure.vars=c("S","C","P"),variable.name="Window",value.name="Time")
time_spent_tests_nbitcoin <- time_spent_tests[time_spent_tests$Project == "NBitcoin",]
time_spent_tests_nbitcoin$TestCategory <- ifelse(time_spent_tests_nbitcoin$Window == "T5.2","Faulty", ifelse(time_spent_tests_nbitcoin$Window == "T7.3" | time_spent_tests_nbitcoin$Window == "T10.4","Faulty", ifelse(time_spent_tests_nbitcoin$Window == "T13.5" | time_spent_tests_nbitcoin$Window == "T14.5", "Not faulty", "Not faulty")))
time_spent_tests_mathnet <- time_spent_tests[time_spent_tests$Project == "MathNet",]
time_spent_tests_mathnet$TestCategory <- ifelse(time_spent_tests_mathnet$Window == "T2.1","Faulty", ifelse(time_spent_tests_mathnet$Window == "T4.2" | time_spent_tests_mathnet$Window == "T11.4","Faulty", ifelse(time_spent_tests_mathnet$Window == "T10.4" | time_spent_tests_mathnet$Window == "T14.5", "Not faulty", "Not faulty")))
# Tests grouped by methods
vs_time_summary_with_projects <- vs_time_summary_with_projects[order(vs_time_summary_with_projects$PID),]
portal_time_summary_with_projects <- portal_time_summary_with_projects[order(portal_time_summary_with_projects$PID),]
pts <- portal_time_summary_with_projects
vts <- vs_time_summary_with_projects
time_spent_tests_grouped <- data.frame(M1=c(1:54),M2=c(1:54),M3=c(1:54),M4=c(1:54),M5=c(1:54))
time_spent_tests_grouped$M1 <- (pts$T0.1+pts$T1.1+pts$T2.1)+(vts$T0.1+vts$T1.1+vts$T2.1)
time_spent_tests_grouped$M2 <- (pts$T3.2+pts$T4.2+pts$T5.2)+(vts$T3.2+vts$T4.2+vts$T5.2)
time_spent_tests_grouped$M3 <- (pts$T6.3+pts$T7.3+pts$T8.3)+(vts$T6.3+vts$T7.3+vts$T8.3)
time_spent_tests_grouped$M4 <- (pts$T9.4+pts$T10.4+pts$T11.4)+(vts$T9.4+vts$T10.4+vts$T11.4)
time_spent_tests_grouped$M5 <- (pts$T12.5+pts$T13.5+pts$T14.5)+(vts$T12.5+vts$T13.5+vts$T14.5)
# Other windows - SUT, CUT and PUT
time_spent_others_nbitcoin <- time_spent_others[time_spent_others$Project == "NBitcoin",]
time_spent_others_mathnet <- time_spent_others[time_spent_others$Project == "MathNet",]
# Joining the dataset
summarized_test_times_portal <- time_spent_tests_portal %>% group_by(PID, Project) %>% summarize(Time=sum(Time)) %>% bind_cols(data.frame(Location=rep("Portal",54)))
summarized_test_time_vs <- time_spent_tests_vs %>% group_by(PID, Project) %>% summarize(Time=sum(Time)) %>% bind_cols(data.frame(Location=rep("VS",54)))
names(time_spent_others)[3] <- "Location"
full_summarized_times <- summarized_test_times_portal %>% bind_rows(summarized_test_time_vs) %>% bind_rows(time_spent_others)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
full_summarized_times$Location <- factor(full_summarized_times$Location, levels=c("Portal", "VS", "C", "S", "P"))
levels(full_summarized_times$Location) <- list("Portal"="Portal", "Test code"="VS", "CUT" = "C", "SUT"="S","PUT"="P")
rm(cut_time_for_test_row,extended_row,marked_ok_summary_row, marked_wrong_summary_row, page_start_row,portal_time_summary_row,put_time_for_test_row,row,sut_time_for_test_row,vs_time_summary_row,window_start_row,currently_active,debug_case,debug_start_index,debug_time,elapsed_page_time,elapsed_vs_time,elapsed_window_time,i,marked_case,page_start_index,pid,portal_page,run_case,vs_start_index,vs_window,window_start_index)
The participant activities were successfully recorded in the replicated study, thus we extract those from the log files.
# Perform the same analytics as in the original study
summarize_logs <- function(log, start_time, testFile_test_mapping) {
work_length <- tibble()
vs_time_summary_rep <- tibble()
portal_time_summary_rep <- tibble()
sut_time_summary_rep <- tibble()
cut_time_summary_rep <- tibble()
put_time_summary_rep <- tibble()
series <- log %>% filter(strptime(Date, format = "%m/%d/%Y %I:%M:%S %p") > start_time)
series <- tibble::rowid_to_column(series, "ID")
new_series <- tibble()
new_series_vs <- tibble()
new_series_vsp <- tibble()
for(pid in unique(series$PID)) {
pid_activities <- series %>% filter(PID == pid)
pid_work_length <- tibble(PID = pid, Time = strptime( last(pid_activities$Date), format = "%m/%d/%Y %I:%M:%S %p") - strptime( pid_activities[1,]$Date, format = "%m/%d/%Y %I:%M:%S %p"))
work_length <- work_length %>% bind_rows(pid_work_length)
if(pid_activities[1,]$Action == "ActivateWindow") {
active <- "VS"
} else {
active <- "Portal"
}
# Active location
for(i in pid_activities$ID) {
if(series[i,]$Action == "ActivateWindow" || series[i,]$Action == "BrowserFocusLost") {
active <- "VS"
} else if(series[i,]$Action == "BrowserFocused" || series[i,]$Action == "Index" || series[i,]$Action == "Code" || series[i,]$Action == "Marked" || series[i,]$Action == "Submit") {
active <- "Portal"
}
#print(active)
to_modify <- series %>% filter(ID == i) %>% mutate(Active = active)
new_series <- new_series %>% bind_rows(to_modify)
}
# Active VS window
vs_active <- ""
for(i in pid_activities$ID) {
if(series[i,]$Action == "ActivateWindow") {
vs_active <- testFile_test_mapping(series[i,]$Location)
}
to_modify <- new_series %>% filter(ID == i) %>% mutate(ActiveVS = vs_active)
new_series_vs <- new_series_vs %>% bind_rows(to_modify)
}
# Active Portal window
p_active <- "Index"
for(i in pid_activities$ID) {
if(series[i,]$Action == "Index") {
p_active <- "Index"
} else if(series[i,]$Action == "Code") {
p_active <- testSeq_to_seq(series[i,]$Location)
}
to_modify <- new_series_vs %>% filter(ID == i) %>% mutate(ActiveP = p_active)
new_series_vsp <- new_series_vsp %>% bind_rows(to_modify)
}
# Summarizing each location time
vs_time_summary_row <- data.frame(PID = pid, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, SUT = 0, CUT = 0, PUT = 0, Total = 0, stringsAsFactors = FALSE)
portal_time_summary_row <- data.frame(PID = pid, Index = 0, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, stringsAsFactors = FALSE)
sut_time_summary_row <- data.frame(PID = pid, Index = 0, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, stringsAsFactors = FALSE)
cut_time_summary_row <- data.frame(PID = pid, Index = 0, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, stringsAsFactors = FALSE)
put_time_summary_row <- data.frame(PID = pid, Index = 0, T0.1 = 0, T1.1 = 0, T2.1 = 0, T3.2 = 0, T4.2 = 0, T5.2 = 0, T6.3 = 0, T7.3 = 0, T8.3 = 0, T9.4 = 0, T10.4 = 0, T11.4 = 0, T12.5 = 0, T13.5 = 0, T14.5 = 0, stringsAsFactors = FALSE)
last_change_time <- strptime(new_series_vsp[pid_activities[1,]$ID,]$Date, format = "%m/%d/%Y %I:%M:%S %p") # THe last change time is the first at start
for(id in pid_activities[-1,]$ID) {
# if the main location changes -> from vs to p and vica versa
lhs <- new_series_vsp[id-1,]
rhs <- new_series_vsp[id,]
diff <- strptime(rhs$Date, format = "%m/%d/%Y %I:%M:%S %p") - last_change_time
if(lhs$Active != rhs$Active) {
if(lhs$Active == "Portal") {
# P -> VS
portal_time_summary_row[lhs$ActiveP] <- portal_time_summary_row[lhs$ActiveP] + as.numeric(diff)
} else {
# VS -> P
if(lhs$ActiveVS != "") {
if(lhs$ActiveVS == "SUT") {
sut_time_summary_row[lhs$ActiveP] <- sut_time_summary_row[lhs$ActiveP] + as.numeric(diff)
} else if(lhs$ActiveVS == "PUT") {
put_time_summary_row[lhs$ActiveP] <- put_time_summary_row[lhs$ActiveP] + as.numeric(diff)
} else if(lhs$ActiveVS == "CUT") {
cut_time_summary_row[lhs$ActiveP] <- cut_time_summary_row[lhs$ActiveP] + as.numeric(diff)
}
vs_time_summary_row[lhs$ActiveVS] <- vs_time_summary_row[lhs$ActiveVS] + as.numeric(diff)
}
}
last_change_time <- strptime(rhs$Date, format = "%m/%d/%Y %I:%M:%S %p")
} else if((lhs$Active == "Portal") & (lhs$ActiveP != rhs$ActiveP)) {
# P[x] -> P[y]
portal_time_summary_row[lhs$ActiveP] <- portal_time_summary_row[lhs$ActiveP] + as.numeric(diff)
last_change_time <- strptime(rhs$Date, format = "%m/%d/%Y %I:%M:%S %p")
} else if((lhs$Active == "VS") & (lhs$ActiveVS != rhs$ActiveVS)) {
# VS[x] -> VS[y]
if(lhs$ActiveVS != "") {
if(lhs$ActiveVS == "SUT") {
sut_time_summary_row[lhs$ActiveP] <- sut_time_summary_row[lhs$ActiveP] + as.numeric(diff)
} else if(lhs$ActiveVS == "PUT") {
put_time_summary_row[lhs$ActiveP] <- put_time_summary_row[lhs$ActiveP] + as.numeric(diff)
} else if(lhs$ActiveVS == "CUT") {
cut_time_summary_row[lhs$ActiveP] <- cut_time_summary_row[lhs$ActiveP] + as.numeric(diff)
}
vs_time_summary_row[lhs$ActiveVS] <- vs_time_summary_row[lhs$ActiveVS] + as.numeric(diff)
}
last_change_time <- strptime(rhs$Date, format = "%m/%d/%Y %I:%M:%S %p")
}
}
vs_time_summary_rep <- vs_time_summary_rep %>% bind_rows(vs_time_summary_row)
portal_time_summary_rep <- portal_time_summary_rep %>% bind_rows(portal_time_summary_row)
sut_time_summary_rep <- sut_time_summary_rep %>% bind_rows(sut_time_summary_row)
cut_time_summary_rep <- cut_time_summary_rep %>% bind_rows(cut_time_summary_row)
put_time_summary_rep <- put_time_summary_rep %>% bind_rows(put_time_summary_row)
}
vs_time_summary_rep <- vs_time_summary_rep %>% mutate(Total = T0.1+T1.1+T2.1+T3.2+T4.2+T5.2+T6.3+T7.3+T8.3+T9.4+T10.4+T11.4+T12.5+T13.5+T14.5+CUT+PUT+SUT)
portal_time_summary_rep <- portal_time_summary_rep %>% mutate(Total = T0.1+T1.1+T2.1+T3.2+T4.2+T5.2+T6.3+T7.3+T8.3+T9.4+T10.4+T11.4+T12.5+T13.5+T14.5)
sut_time_summary_rep <- sut_time_summary_rep %>% mutate(Total = T0.1+T1.1+T2.1+T3.2+T4.2+T5.2+T6.3+T7.3+T8.3+T9.4+T10.4+T11.4+T12.5+T13.5+T14.5)
cut_time_summary_rep <- cut_time_summary_rep %>% mutate(Total = T0.1+T1.1+T2.1+T3.2+T4.2+T5.2+T6.3+T7.3+T8.3+T9.4+T10.4+T11.4+T12.5+T13.5+T14.5)
put_time_summary_rep <- put_time_summary_rep %>% mutate(Total = T0.1+T1.1+T2.1+T3.2+T4.2+T5.2+T6.3+T7.3+T8.3+T9.4+T10.4+T11.4+T12.5+T13.5+T14.5)
return(list(vs_time_summary_rep, portal_time_summary_rep, sut_time_summary_rep, cut_time_summary_rep, put_time_summary_rep,work_length))
}
output_nodatime <- summarize_logs(nodatime_log, nodatime_start_time, nodatime_test_to_seq)
nodatime_vs <- output_nodatime[[1]] %>% mutate(Project = "NodaTime")
nodatime_portal <- output_nodatime[[2]] %>% mutate(Project = "NodaTime")
nodatime_sut_tests <- output_nodatime[[3]] %>% mutate(Project = "NodaTime")
nodatime_cut_tests <- output_nodatime[[4]] %>% mutate(Project = "NodaTime")
nodatime_put_tests <- output_nodatime[[5]] %>% mutate(Project = "NodaTime")
nodatime_work_length <- output_nodatime[[6]] %>% mutate(Project = "NodaTime")
output_nettopology <- summarize_logs(nettopology_log, nettopology_start_time, nettopology_test_to_seq)
nettopology_vs <- output_nettopology[[1]] %>% mutate(Project = "NetTopology")
nettopology_portal <- output_nettopology[[2]] %>% mutate(Project = "NetTopology")
nettopology_sut_tests <- output_nettopology[[3]] %>% mutate(Project = "NetTopology")
nettopology_cut_tests <- output_nettopology[[4]] %>% mutate(Project = "NetTopology")
nettopology_put_tests <- output_nettopology[[5]] %>% mutate(Project = "NetTopology")
nettopology_work_length <- output_nettopology[[6]] %>% mutate(Project = "NetTopology")
# Calculating work length statistics
nodatime_work_stats <- nodatime_work_length %>% summarize(mean=mean(Time), median=median(Time), sd=sd(Time), min=min(Time), max=max(Time))
nettopology_work_stats <- nettopology_work_length %>% summarize(mean=mean(Time), median=median(Time), sd=sd(Time), min=min(Time), max=max(Time))
# Binding the two projects
rep_portal <- nodatime_portal %>% bind_rows(nettopology_portal)
rep_vs <- nodatime_vs %>% bind_rows(nettopology_vs)
rep_sut_tests <- nodatime_sut_tests %>% bind_rows(nettopology_sut_tests)
rep_cut_tests <- nodatime_cut_tests %>% bind_rows(nettopology_cut_tests)
rep_put_tests <- nodatime_put_tests %>% bind_rows(nettopology_put_tests)
# Melting
rep_portal_time <- melt(rep_portal,id.vars=c("PID","Project"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"),variable.name="Page",value.name="Time")
rep_vs_time <- melt(rep_vs,id.vars=c("PID","Project"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"),variable.name="Window",value.name="Time")
rep_others_time <- melt(rep_vs, id.vars=c("PID","Project"),
measure.vars=c("SUT","CUT","PUT"),variable.name="Location",value.name="Time")
# Mark faulty tests
rep_portal_time <- rep_portal_time %>% mutate(Faulty = FALSE)
rep_vs_time <- rep_vs_time %>% mutate(Faulty = FALSE)
rep_portal_time[rep_portal_time$Project == "NodaTime" & (rep_portal_time$Page == "T2.1" | rep_portal_time$Page == "T6.3" | rep_portal_time$Page == "T12.5"),]$Faulty = TRUE
rep_portal_time[rep_portal_time$Project == "NetTopology" & (rep_portal_time$Page == "T0.1" | rep_portal_time$Page == "T4.2" | rep_portal_time$Page == "T10.4"),]$Faulty = TRUE
rep_vs_time[rep_vs_time$Project == "NodaTime" & (rep_vs_time$Window == "T2.1" | rep_vs_time$Window == "T6.3" | rep_vs_time$Window == "T12.5"),]$Faulty = TRUE
rep_vs_time[rep_vs_time$Project == "NetTopology" & (rep_vs_time$Window == "T0.1" | rep_vs_time$Window == "T4.2" | rep_vs_time$Window == "T10.4"),]$Faulty = TRUE
# Joining the dataset
summarized_rep_portal <- rep_portal_time %>% group_by(PID, Project) %>% summarize(Time=sum(Time)) %>% bind_cols(data.frame(Location=rep("Portal",52), stringsAsFactors = FALSE))
summarized_rep_vs <- rep_vs_time %>% group_by(PID, Project) %>% summarize(Time=sum(Time)) %>% bind_cols(data.frame(Location=rep("VS",52), stringsAsFactors = FALSE))
full_summarized_times_rep <- summarized_rep_portal %>% bind_rows(summarized_rep_vs) %>% bind_rows(rep_others_time)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
full_summarized_times_rep$Location <- factor(full_summarized_times_rep$Location, levels=c("Portal", "VS", "CUT", "SUT", "PUT"))
full_summarized_times_rep$Location <- plyr::revalue(full_summarized_times_rep$Location, c("VS"="Test code", "gamma"="three"))
## The following `from` values were not present in `x`: gamma
The following boxplots visualize the time spent by participants at each possible location.
# IN PAPER
#pdf(file="full-time-spent-nbitcoin.pdf",width=4,height = 2.5)
ggplot(data=filter(full_summarized_times, Project=="NBitcoin"), aes(x=Location,y=Time)) +geom_boxplot() + theme_hc() + scale_y_continuous(limits=c(0,1600), breaks=seq(0,1500,250)) + ylab("Time [s]")
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
#dev.off()
# IN PAPER
#pdf(file="full-time-spent-mathnet.pdf",width=4,height = 2.5)
ggplot(data=filter(full_summarized_times, Project=="MathNet"), aes(x=Location,y=Time)) +geom_boxplot() + theme_hc()+ scale_y_continuous(limits=c(0,1600), breaks=seq(0,1500,250)) + ylab("Time [s]")
#dev.off()
# IN PAPER
#pdf(file="C:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\full-time-spent-nodatime.pdf",width=4,height = 2.5)
ggplot(data=filter(full_summarized_times_rep, Project=="NodaTime"), aes(x=Location,y=Time)) +geom_boxplot() + theme_hc() + scale_y_continuous(limits=c(0,1600), breaks=seq(0,1500,250)) + ylab("Time [s]")
#dev.off()
# IN PAPER
#pdf(file="C:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\full-time-spent-nettopology.pdf",width=4,height = 2.5)
ggplot(data=filter(full_summarized_times_rep, Project=="NetTopology"), aes(x=Location,y=Time)) +geom_boxplot() + theme_hc()+ scale_y_continuous(limits=c(0,1600), breaks=seq(0,1500,250)) + ylab("Time [s]")
#dev.off()
The following two boxplots show the full time invested to each test case by summing the time spent at each possible location.
time_summary_data <- function(first_project,
second_project,
first_faulty_tests,
second_faulty_tests,
p_cut_tests,
p_put_tests,
p_sut_tests,
p_vs_time,
p_portal_time,
p_result_answers) {
cut_times <- melt(p_cut_tests,id.vars = c("PID"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), variable.name = "Test",value.name="Time")
all_times_for_tests <- cut_times
put_times <- melt(p_put_tests, id.vars = c("PID"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), variable.name = "Test",value.name="Time")
all_times_for_tests <- rbind(all_times_for_tests, put_times)
sut_times <- melt(p_sut_tests, id.vars = c("PID"), measure.vars=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"), variable.name = "Test",value.name="Time")
all_times_for_tests <- rbind(all_times_for_tests, sut_times)
vs_time_for_tests <- subset(p_vs_time, select=c(PID,Window,Time))
names(vs_time_for_tests) <- c("PID","Test","Time")
portal_time_for_tests <- subset(p_portal_time, select=c(PID,Page,Time))
names(portal_time_for_tests) <- c("PID","Test","Time")
all_times_for_tests <- rbind(all_times_for_tests, vs_time_for_tests)
all_times_for_tests <- rbind(all_times_for_tests, portal_time_for_tests)
all_times_for_tests <- cbind(all_times_for_tests, c(rep("CUT",nrow(cut_times)),rep("PUT",nrow(put_times)),rep("SUT",nrow(sut_times)),rep("VS",nrow(vs_time_for_tests)),rep("Portal",nrow(portal_time_for_tests))))
names(all_times_for_tests)[4] <- "Location"
all_times_for_tests <- cbind(all_times_for_tests, c(rep(NA,nrow(all_times_for_tests))))
names(all_times_for_tests)[5] <- "Project"
all_times_for_tests$Project[as.numeric(all_times_for_tests$PID) > 40] <- second_project
all_times_for_tests$Project[as.numeric(all_times_for_tests$PID) < 41] <- first_project
all_times_for_tests$PID <- as.numeric(all_times_for_tests$PID)
if(first_project == "NBitcoin" && second_project == "MathNet") {
all_times_for_tests <- dplyr::filter(all_times_for_tests, !((PID == 59 & (Test == "T0.1" | Test == "T1.1")) | (PID == 55 & (Test == "T0.1" | Test == "T1.1")) ))
}
jj <- inner_join(x=all_times_for_tests, y=p_result_answers, by=c("PID" = "pid", "Test" = "TestId", "Project" = "Project"))
time_summary_plot <- jj %>%
group_by(PID,Test,Project,Method) %>%
summarise(Time=sum(Time)) %>%
# TODO Check if this works as expected
mutate(Faulty=ifelse( (Project==first_project && Test %in% first_faulty_tests) || (Project==second_project && Test %in% second_faulty_tests) ,"YES","NO"))
time_summary_plot$Test <- factor(time_summary_plot$Test, levels=c("T0.1","T1.1","T2.1","T3.2","T4.2","T5.2","T6.3","T7.3","T8.3","T9.4","T10.4","T11.4","T12.5","T13.5","T14.5"))
return(time_summary_plot)
}
palette <- c("#FFFFFF","#ff821c")
time_summary_plot <- time_summary_data("NBitcoin", "MathNet", c("T5.2","T7.3","T10.4"), c("T2.1","T4.2","T11.4"), cut_time_for_test, put_time_for_test, sut_time_for_test, time_spent_tests_vs, time_spent_tests_portal, result_answers)
## Warning: Column `Test`/`TestId` joining factors with different levels,
## coercing to character vector
# IN PAPER
#pdf(file="mathnet-time-spent-tests.pdf",width=7,height = 3.5)
ggplot(data=filter(time_summary_plot, Project == "MathNet"), aes(x=Test,y=Time,fill=factor(Faulty)) ) + geom_boxplot() + facet_grid(~Method, scales="free_x") + theme_hc() + ylab("Time [s]") + xlab("Test ID") + scale_fill_manual(values=palette, guide=FALSE) + scale_y_continuous(limits=c(0,900), breaks=seq(0,900,100))
#dev.off()
# IN PAPER
#pdf(file="nbitcoin-time-spent-tests.pdf",width=7,height = 3.5)
ggplot(data=filter(time_summary_plot, Project == "NBitcoin"), aes(x=Test,y=Time,fill=factor(Faulty))) + geom_boxplot() + facet_grid(~Method, scales="free_x") + theme_hc() + ylab("Time [s]") + xlab("Test ID") +scale_fill_manual(values=palette, guide=FALSE) + scale_y_continuous(limits=c(0,900), breaks=seq(0,900,100))
#dev.off()
time_summary_plot_rep <- time_summary_data("NodaTime", "NetTopologySuite", c("T2.1","T6.3","T12.5"), c("T0.1","T4.2","T10.4"), rep_cut_tests, rep_put_tests, rep_sut_tests, rep_vs_time, rep_portal_time, r_result_answers)
## Warning: Column `Test`/`TestId` joining factors with different levels,
## coercing to character vector
# IN PAPER
#pdf(file="C:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\nettopology-time-spent-tests.pdf",width=7,height = 3.5)
ggplot(data=filter(time_summary_plot_rep, Project == "NetTopologySuite"), aes(x=Test,y=Time,fill=factor(Faulty)) ) + geom_boxplot() + facet_grid(~Method, scales="free_x") + theme_hc() + ylab("Time [s]") + xlab("Test ID") + scale_fill_manual(values=palette, guide=FALSE) + scale_y_continuous(limits=c(0,900), breaks=seq(0,900,100))
#dev.off()
# IN PAPER
#pdf(file="C:\\PhD\\Repos\\paper-wbstudy-ist\\figures\\nodatime-time-spent-tests.pdf",width=7,height = 3.5)
ggplot(data=filter(time_summary_plot_rep, Project == "NodaTime"), aes(x=Test,y=Time,fill=factor(Faulty))) + geom_boxplot() + facet_grid(~Method, scales="free_x") + theme_hc() + ylab("Time [s]") + xlab("Test ID") +scale_fill_manual(values=palette, guide=FALSE) + scale_y_continuous(limits=c(0,900), breaks=seq(0,900,100))
#dev.off()