This is a supplementary report for the paper “A Preliminary Analysis on the Effect of Randomness in a CEGAR Framework” by Akos Hajdu and Zoltan Micskei, presented at the 25th PhD Mini-Symposium (2018), organized by the Department of Measurement and Information Systems at the Budapest University of Technology and Economics.

1 Initialization

library(tidyverse)
library(stringr)
library(gridExtra)
library(GGally)

theme_rotate_x <- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
theme_noticks <- theme(axis.ticks = element_blank())
fill_heat <- scale_fill_gradient(low = "green", high = "red", na.value = "white")
percent <- function(actual, total, round = 0) { round(actual / total * 100, digits = round) }

1.1 Load data

d <- read_csv(
  params$csv_path,
  col_types = cols(
    Model = col_character(),
    Domain = col_character(),
    Refinement = col_character(),
    Search = col_character(),
    RandomVars = col_character(),
    Result = col_character(),
    TimeMs = col_integer(),
    Iterations = col_integer(),
    ArgSize = col_integer(),
    ArgDepth = col_integer(),
    ArgMeanBranchFactor = col_double(),
    CexLen = col_integer()
  )
)

d_model_attrs <- read_csv(
  params$models_csv_path,
  col_types = cols(
    Model = col_character(),
    Inputs = col_integer(),
    Latches = col_integer(),
    AndGates = col_integer(),
    Vars = col_integer(),
    Locations = col_integer(),
    Edges = col_integer(),
    Assigns = col_integer(),
    Assumes = col_integer(),
    Havocs = col_integer()
  )
)

1.2 Clean data

d$Domain <- factor(d$Domain)
d$Refinement <- gsub("BW_BIN_ITP", "BIN", d$Refinement)
d$Refinement <- gsub("SEQ_ITP", "SEQ", d$Refinement)
d$Refinement <- factor(d$Refinement)
d$RandomVars <- as.logical(d$RandomVars)

# Combine RandomVars and Search into a single column
d <- d %>% mutate(Randomized = ifelse(RandomVars, "VARS", ifelse(Search == "RND", "SEARCH", "DET")))
d <- d  %>% select(-Search, -RandomVars)
d$Randomized <- factor(d$Randomized)

# Create 'Config' column
d <- d %>% mutate(Config = paste(
  substr(Domain, 1, 1),
  substr(Refinement, 1, 1),
  "-",
  substr(Randomized, 1, 1),
  sep = ""
))
d$Config <- factor(d$Config)

all_params <- c("Randomized", "Domain", "Refinement")

# Convert 'Result' to logical (and remove exceptions)
n_rows_empty <- sum(is.na(d$Result))
n_rows_exception <- nrow(d %>% filter(str_detect(d$Result, "\\[EX\\]")))
d$Result <- as.logical(d$Result)

# Shorten model names by removing irrelevant details
simplify_name <- function(name) {
  name <- gsub("../models/cfa/", "", name)
  name <- gsub("../models/sts/", "", name)
  name <- gsub("\\.cfa", "", name)
  name <- gsub("\\.aag", "", name)
  name <- gsub("hwmcc", "hw", name)
  name <- gsub("-unreach-call", "", name)
  name <- gsub("_true", "", name)
  name <- gsub("_false", "", name)
  name <- gsub("\\.c", "", name)
  name <- gsub("Problem0", "prob", name)
  name <- gsub("label", "lab", name)
  name <- gsub("(ssh/.*)_0", "\\1", name)
  name <- gsub("(eca/.*)_0", "\\1", name)
  name
}
d$Model <- simplify_name(d$Model)
d_model_attrs$Model <- simplify_name(d_model_attrs$Model)

# Determine category
d <- d %>% mutate(Category = sub("/([^/]*)$","", d$Model))
d$Model <- as.factor(d$Model)

str(d)

## Classes 'tbl_df', 'tbl' and 'data.frame':    10800 obs. of  13 variables:
##  $ Model              : Factor w/ 30 levels "eca/prob1_lab15",..: 14 14 14 14 14 14 14 14 14 14 ...
##  $ Domain             : Factor w/ 2 levels "EXPL","PRED": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Refinement         : Factor w/ 2 levels "BIN","SEQ": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Result             : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ TimeMs             : int  28290 27241 27859 27581 27146 27307 27271 27633 27208 27318 ...
##  $ Iterations         : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ ArgSize            : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ ArgDepth           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ ArgMeanBranchFactor: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ CexLen             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Randomized         : Factor w/ 3 levels "DET","SEARCH",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Config             : Factor w/ 12 levels "EB-D","EB-S",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Category           : chr  "hw" "hw" "hw" "hw" ...

2 Overview

2.1 Summary

n_meas_rep_total <- nrow(d)
n_meas_rep_succ <- nrow(d %>% filter(!is.na(Result)))
n_models_total <- length(unique(d$Model))
n_configs_total <- length(unique(d$Config))
categories <- unique(d$Category)
n_categories <- length(categories)
n_rows_problem <- n_rows_empty + n_rows_exception

d_model_config <- d %>%
  group_by_(.dots = c("Config", "Model", "Category", all_params)) %>%
  summarise(
    Reps = n(),
    SuccCount = sum(!is.na(Result)),
    SuccSD = sd(!is.na(Result)),
    Succ = SuccCount > 0,
    TimeMsAvg = mean(TimeMs, na.rm = T),
    TimeMsRSD = ifelse(SuccCount == 1, 0, sd(TimeMs, na.rm = T) / TimeMsAvg),
    IterationsAvg = mean(Iterations, na.rm = T),
    IterationsRSD = ifelse(SuccCount == 1, 0, sd(Iterations, na.rm = T) / IterationsAvg),
    ArgSizeAvg = mean(ArgSize, na.rm = T),
    ArgSizeRSD = ifelse(SuccCount == 1, 0, sd(ArgSize, na.rm = T) / ArgSizeAvg),
    ArgDepthAvg = mean(ArgDepth, na.rm = T),
    ArgDepthRSD = ifelse(SuccCount == 1, 0, sd(ArgDepth, na.rm = T) / ArgDepthAvg),
    CexLenRSD = ifelse(sum(!is.na(CexLen)) == 0, NA, ifelse(sum(!is.na(CexLen)) == 1, 0, sd(CexLen, na.rm = T) / mean(CexLen, na.rm = T)))
    )

n_meas_total <- nrow(d_model_config)
n_meas_succ <- nrow(d_model_config %>% filter(Succ))

if (n_meas_total !=  n_models_total * n_configs_total) {
  warning(sprintf("%d models * %d configs = %d expected measurements, but there are %d actual measurements",
                  n_models_total, n_configs_total, n_models_total * n_configs_total, n_meas_total))
}
d_model <- d_model_config %>% group_by_("Model", "Category") %>%
  summarise(
    SuccRunCount = sum(SuccCount),
    SuccCount = sum(Succ),
    Succ = SuccCount > 0)

n_models_succ <- nrow(d_model %>% filter(Succ))

d_config <- d_model_config %>%
  group_by_(.dots = c("Config", all_params)) %>%
  summarise(
    SuccRunCount = sum(SuccCount),
    SuccCount = sum(Succ))

Terminology

A run is a single execution of the algorithm configuration on an input model.
- A run is successful if it terminates within the time limit.
A measurement is the collection of all repeated runs of a configuration on a model.
- A measurement is successful if it includes at least one successful run. In this case we also say that the configuration verified the model.

Results

There are 30 models (from 5 categories) and 12 configurations, giving 360 measurements.
- Each measurement was repeated 30 times, giving a total number of 10800 runs.
7080 / 10800 runs (66%) are successful with a timeout of timeout of 180 s.
- 261 / 360 measurements (72%) are successful.
- There are 389 problems (see details later).
28 / 30 models (93%) were verified by at least one configuration.

2.2 Problems

There are 389 empty results (no timeout, no exception). This usually happens when the SMT solver crashes.
There are 0 exceptions.

if (n_rows_exception > 0) {
  knitr::kable(d_exceptions %>% group_by(Type = Result) %>% summarise(Configs = toString(unique(Config)), Count = n()))
}

2.3 Models

The 30 models are distributed in the 5 categories in the following way.

ggplot(d_model) +
  geom_bar(aes(x = Category, fill = Category)) +
  coord_flip() + guides(fill = F) +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Distribution of models among the categories")

The following plots show for each model the number of successful runs and the number of configurations that verified them.

grid.arrange(
  ggplot(d_model) +
    geom_point(aes(reorder(Model, -SuccRunCount), SuccRunCount, color = Category), size = 2) +
    theme_rotate_x + scale_color_brewer(palette = "Set1") +
    labs(x = "Model", y = "Successful runs", title = "Number of successful runs for the models"),
  ggplot(d_model) +
    geom_point(aes(reorder(Model, -SuccCount), SuccCount, color = Category), size = 2) +
    theme_rotate_x + scale_color_brewer(palette = "Set1") +
    coord_cartesian(ylim = c(0, n_configs_total)) +
    labs(x = "Model", y = "Verified by", title = "Number of configs that verified the models"),
  ncol = 2)

The following table summarizes the attributes of the models. Hardware models can be described by the number of hardware elements, while software and PLC models can be described by the number of control locations and statements.

knitr::kable(d_model_attrs %>% select(Model, Inputs, Latches, AndGates) %>% filter(!is.na(Inputs)))

Model	Inputs	Latches	AndGates
hw/139444p22	244	322	5549
hw/6s282b01	44	637	3185
hw/beemadd3b1	60	56	876
hw/bj08amba4g5	11	33	13585
hw/bobtuint12neg	212	207	1937
hw/intel001	31	23	240
hw/mentorbm1p04	100	2373	17508
hw/oski15a14b16s	1023	3451	33367
hw/pdtvistwoall1	6	31	725
hw/texaspimainp08	14	42	1955

knitr::kable(d_model_attrs %>% select(Model, Locations, Edges, Assigns, Assumes, Havocs) %>% filter(!is.na(Locations)))

Model	Locations	Edges	Assigns	Assumes	Havocs
eca/prob1_lab15	317	393	236	156	1
eca/prob1_lab19	322	403	236	166	1
eca/prob2_lab57	312	408	211	196	1
eca/prob3_lab03	1261	1436	1081	354	1
eca/prob3_lab48	1280	1474	1081	392	1
locks/locks_10_2	26	36	10	24	2
locks/locks_11_8	16	21	5	14	2
locks/locks_14_1	9	10	2	6	2
locks/locks_15_0	9	10	2	6	2
locks/locks_5_4	13	16	4	10	2
plc/plc1	66	70	51	11	8
plc/plc2	175	196	135	40	21
plc/plc3	175	196	135	40	21
plc/plc4a	175	196	135	40	21
plc/plc4b	175	196	135	40	21
ssh/s3_clnt_1	187	262	79	154	29
ssh/s3_clnt_3	193	268	85	154	29
ssh/s3_srvr_1	233	323	102	184	37
ssh/s3_srvr_3	230	320	100	184	36
ssh/s3_srvr_4	230	320	100	184	36

2.4 Configurations

Configurations are listed in the following table with their abbreviation, parameters, successful runs and verified models.

knitr::kable(d_config)

Config	Randomized	Domain	Refinement	SuccRunCount	SuccCount
EB-D	DET	EXPL	BIN	622	21
EB-S	SEARCH	EXPL	BIN	618	22
EB-V	VARS	EXPL	BIN	573	21
ES-D	DET	EXPL	SEQ	570	19
ES-S	SEARCH	EXPL	SEQ	629	22
ES-V	VARS	EXPL	SEQ	582	21
PB-D	DET	PRED	BIN	656	22
PB-S	SEARCH	PRED	BIN	518	24
PB-V	VARS	PRED	BIN	685	25
PS-D	DET	PRED	SEQ	544	19
PS-S	SEARCH	PRED	SEQ	534	22
PS-V	VARS	PRED	SEQ	549	23

The following plots show the number of models verified and the successful runs by each config. The number of models verified ranges between 19 and 25, while the number of successful runs is between 518 and 685.

grid.arrange(
  ggplot(d_config) +
    geom_bar(aes(reorder(Config, SuccCount), SuccCount, fill = Randomized), stat = "identity") +
    theme_rotate_x + scale_fill_brewer(palette = "Set1") +
    labs(x = "Config", y = "Models verified", title = "Number of models verified by each config"),
  ggplot(d_config) +
    geom_bar(aes(reorder(Config, SuccRunCount), SuccRunCount, fill = Randomized), stat = "identity") +
    theme_rotate_x + scale_fill_brewer(palette = "Set1") +
    labs(x = "Config", y = "Succ. runs", title = "Number of successful runs by each config"),
  ncol = 2)

The following plot compares the number of verified models to the number of successful runs for each configuration.

ggplot(d_config) +
  geom_point(aes(SuccCount, SuccRunCount, color = Randomized)) +
  scale_color_brewer(palette = "Set1") +
  labs(x = "Number of models verified", y = "Number of succ. runs")

2.5 Output metrics

The following histograms show an overview on the range of the output metrics colored by the category.

grid.arrange(
  ggplot(d) + geom_bar(aes(!is.na(Result), fill = Category)) + xlab("Succ") + scale_fill_brewer(palette = "Set1"),
  ggplot(d) + geom_histogram(aes(TimeMs, fill = Category), na.rm = T) + scale_x_log10() + scale_fill_brewer(palette = "Set1"),
  ggplot(d) + geom_histogram(aes(Iterations, fill = Category), na.rm = T) + scale_x_log10() + scale_fill_brewer(palette = "Set1"),
  ggplot(d) + geom_histogram(aes(ArgSize, fill = Category), na.rm = T) + scale_x_log10() + scale_fill_brewer(palette = "Set1"),
  ggplot(d) + geom_histogram(aes(ArgDepth, fill = Category), na.rm = T) + scale_x_log10() + scale_fill_brewer(palette = "Set1"),
  ggplot(d) + geom_histogram(aes(CexLen, fill = Category), na.rm = T) + scale_x_log10() + scale_fill_brewer(palette = "Set1"),
  ncol = 2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The following scatterplot matrix shows the numerical output metrics.

ggpairs(d %>% filter(!is.na(Result)) %>% select(TimeMs, Iterations, ArgSize, ArgDepth, CexLen))

## Warning in (function (data, mapping, alignPercent = 0.6, method =
## "pearson", : Removed 4546 rows containing missing values

## Warning in (function (data, mapping, alignPercent = 0.6, method =
## "pearson", : Removed 4546 rows containing missing values

## Warning in (function (data, mapping, alignPercent = 0.6, method =
## "pearson", : Removed 4546 rows containing missing values

## Warning in (function (data, mapping, alignPercent = 0.6, method =
## "pearson", : Removed 4546 rows containing missing values

## Warning: Removed 4546 rows containing missing values (geom_point).

## Warning: Removed 4546 rows containing missing values (geom_point).

## Warning: Removed 4546 rows containing missing values (geom_point).

## Warning: Removed 4546 rows containing missing values (geom_point).

## Warning: Removed 4546 rows containing non-finite values (stat_density).

3 Success rates (RQ1)

The following heatmaps show for each measurement whether it was successful (at least one successful run) and the number of successful executions (if there were any). In the latter plot, those measurements are marked (with circles for randomized search and triangles for randomized variable names) where a randomized configuration verified a model that its deterministic counterpart could not.

d_model_config2 <- d_model_config %>% ungroup() %>%
  select(Config, Model, Randomized, Domain, Refinement, SuccCount)
d_model_config2 <- inner_join(
  d_model_config2,
  d_model_config2 %>% filter(Randomized == "DET"),
  by = c("Model", "Domain", "Refinement")) %>%
  mutate(Marked = ifelse((SuccCount.x > 0) & (SuccCount.y == 0), ifelse(Randomized.x == "VARS", "V", "S"), NA))

d_model_config$Marked <- d_model_config2$Marked

grid.arrange(
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = Succ), color = "black") +
    theme_noticks + theme_rotate_x +
    ggtitle("At least one successful run"),
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = ifelse(SuccCount > 0, SuccCount, NA)), color = "black") +
    geom_point(aes(shape = Marked), na.rm = T) + guides(shape=F) +
    theme_noticks + theme_rotate_x +
    scale_fill_gradient(low = "red", high = "green", na.value = "white", limits = c(1, NA), breaks = c(1, 10, 20, 30)) +
    labs(title = "Number of successful runs", fill = "Count"),
  ncol = 2)

det_succ <- unique((d %>% filter(!is.na(Result)) %>% filter(Randomized == "DET"))$Model)
rnd_succ <- unique((d %>% filter(!is.na(Result)) %>% filter(Randomized != "DET"))$Model)
det_only <- setdiff(det_succ, rnd_succ)
rnd_only <- setdiff(rnd_succ, det_succ)

There are 0 models that were only verified by deterministic configs: .

There are 3 models that were only verified by randomized configs: hw/139444p22, hw/intel001, ssh/s3_srvr_4.

4 Variance of the output metrics (RQ2)

For each measurement, there are multiple runs, forming a distribution over the output metrics.

4.1 Individual distributions

The following plots show the distributions of the output metrics for each measurement individually.

ggplot(d) +
  geom_bar(aes(!is.na(Result), fill = Randomized)) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_fill_brewer(palette = "Set1") +
  labs(x = "Succ", title = "Distributions of successful executions")

ggplot(d) +
  geom_histogram(aes(TimeMs, fill = Randomized), bins = 15, na.rm = T) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_x_log10() + scale_fill_brewer(palette = "Set1") +
  ggtitle("Distributions of execution time")

ggplot(d) +
  geom_histogram(aes(Iterations, fill = Randomized), bins = 15, na.rm = T) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_x_log10() + scale_fill_brewer(palette = "Set1") +
  ggtitle("Distributions of iterations")

ggplot(d) +
  geom_histogram(aes(ArgSize, fill = Randomized), bins = 15, na.rm = T) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_x_log10() + scale_fill_brewer(palette = "Set1") +
  ggtitle("Distributions of ARG size")

ggplot(d) +
  geom_histogram(aes(ArgDepth, fill = Randomized), bins = 15, na.rm = T) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_x_log10() + scale_fill_brewer(palette = "Set1") +
  ggtitle("Distributions of ARG depth")

ggplot(d) +
  geom_histogram(aes(CexLen, fill = Randomized), bins = 15, na.rm = T) +
  facet_grid(Model ~ Config) +
  theme_bw() + theme(strip.text.y = element_text(angle = 0)) + theme_rotate_x +
  scale_x_log10() + scale_fill_brewer(palette = "Set1") +
  ggtitle("Distributions of cex. length")

4.2 Individual deviations

In this research question we are mainly interested in the deviations of the distributions. The following heatmaps summarize the deviations for each measurement and each output metric. The SuccSD variable shows the standard deviation of the success of the verification (0: false, 1: true). Other metrics correspont to the relative standard deviation (RSD = SD / mean) of the output metrics.

grid.arrange(
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = SuccSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("Success SD"),
  
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = TimeMsRSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("Time RSD"),
  
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = IterationsRSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("Iterations RSD"),
  
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = ArgSizeRSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("ARG size RSD"),
  
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = ArgDepthRSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("ARG depth RSD"),
  
  ggplot(d_model_config, aes(Config, Model)) +
    geom_tile(aes(fill = CexLenRSD), color = "black") +
    theme_noticks + theme_rotate_x +
    fill_heat + ggtitle("Counterexample length RSD"),
ncol = 2)

4.3 Summarized deviations

The following plots summarize the distribution of the deviations, grouped by the parameter that is randomized.

grid.arrange(
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = SuccSD), na.rm = T) + theme_rotate_x,
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = TimeMsRSD), na.rm = T) + theme_rotate_x,
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = IterationsRSD), na.rm = T) + theme_rotate_x,
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = ArgSizeRSD), na.rm = T) + theme_rotate_x,
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = ArgDepthRSD), na.rm = T) + theme_rotate_x,
  ggplot(d_model_config) + geom_boxplot(aes(x = Randomized, y = CexLenRSD), na.rm = T) + theme_rotate_x,
  ncol = 6)

The following plot shows the same data, but using a uniform y axis scale.

d_model_config2 <- d_model_config %>%
  gather("Metric", "Deviation", SuccSD, TimeMsRSD, IterationsRSD, ArgSizeRSD, ArgDepthRSD, CexLenRSD)
ggplot(d_model_config2) +
  geom_boxplot(aes(Metric, Deviation, color = Randomized), na.rm = T)

The following plot shows the same data, but outliers above 1.2 are trimmed (there are 5 of them).

ggplot(d_model_config2 %>% filter(Deviation < 1.2)) +
  geom_boxplot(aes(Metric, Deviation, color = Randomized), na.rm = T)

The following parallel coordinate plots show the deviations of each output metric, where each line corresponds to a measurement.

grid.arrange(
  ggparcoord(d_model_config %>% ungroup() %>% filter(Randomized == "SEARCH") %>%
               select(TimeMsRSD, IterationsRSD, ArgSizeRSD, ArgDepthRSD, CexLenRSD)) +
    ggtitle("RSDs for randomized search"),
  ggparcoord(d_model_config %>% ungroup() %>% filter(Randomized == "VARS") %>%
               select(TimeMsRSD, IterationsRSD, ArgSizeRSD, ArgDepthRSD, CexLenRSD)) +
    ggtitle("RSDs for randomized variables"),
  ncol = 2)

The following table shows the mean values of the deviations for each output metric grouped by the parameter that is randomized.

knitr::kable(d_model_config %>% group_by(Randomized) %>% summarize(
  Succ = mean(SuccSD, na.rm = T),
  TimeMs = mean(TimeMsRSD, na.rm = T),
  Iterations = mean(IterationsRSD, na.rm = T),
  ArgSize = mean(ArgSizeRSD, na.rm = T),
  ArgDepth = mean(ArgDepthRSD, na.rm = T),
  CexLen = mean(CexLenRSD, na.rm = T)
), caption = "Mean value of the deviations")

Mean value of the deviations
Randomized	Succ	TimeMs	Iterations	ArgSize	ArgDepth	CexLen
DET	0.0132587	0.0326082	0.0000000	0.0000000	0.0000000	0.0000000
SEARCH	0.0652476	0.1948332	0.0856263	0.1641061	0.0757643	0.0098402
VARS	0.0551681	0.1447746	0.0282938	0.1409662	0.0409520	0.0081250

The following table shows the mean values of the deviations for each output metric for each configuration.

knitr::kable(d_model_config %>% group_by(Config) %>% summarize(
  Succ = mean(SuccSD, na.rm = T),
  TimeMs = mean(TimeMsRSD, na.rm = T),
  Iterations = mean(IterationsRSD, na.rm = T),
  ArgSize = mean(ArgSizeRSD, na.rm = T),
  ArgDepth = mean(ArgDepthRSD, na.rm = T),
  CexLen = mean(CexLenRSD, na.rm = T)
), caption = "Mean value of the deviations")

Mean value of the deviations
Config	Succ	TimeMs	Iterations	ArgSize	ArgDepth	CexLen
EB-D	0.0149925	0.0318736	0.0000000	0.0000000	0.0000000	0.0000000
EB-S	0.0309747	0.1130352	0.1113018	0.1039979	0.0685456	0.0112486
EB-V	0.0683914	0.1843390	0.0394242	0.2805611	0.0975525	0.0000000
ES-D	0.0000000	0.0330502	0.0000000	0.0000000	0.0000000	0.0000000
ES-S	0.0182574	0.1560224	0.0769143	0.1800610	0.1006171	0.0040703
ES-V	0.0313430	0.0641824	0.0043733	0.0381368	0.0138147	0.0304686
PB-D	0.0115249	0.0318854	0.0000000	0.0000000	0.0000000	0.0000000
PB-S	0.0878830	0.1376548	0.0476057	0.0979967	0.0533533	0.0070693
PB-V	0.0496457	0.1849910	0.0470859	0.1368538	0.0277514	0.0000000
PS-D	0.0265174	0.0338148	0.0000000	0.0000000	0.0000000	0.0000000
PS-S	0.1238753	0.3778186	0.1101400	0.2803788	0.0825784	0.0160288
PS-V	0.0712922	0.1385212	0.0195455	0.1118677	0.0283993	0.0000000

Supplementary Report