
# ----------------------------------------------------------------------------------------------------------
# global options

options(stringsAsFactors = FALSE)

# ----------------------------------------------------------------------------------------------------------
# load and install binary packages

ipak <- function(pkg){
    new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
    if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

packages <- c("data.table", "lme4", "car", "effects", "lsmeans", "ResourceSelection",
                       "ggplot2", "ggthemes", "scales", "reshape2", "plyr")
ipak(packages)

# ----------------------------------------------------------------------------------------------------------
# set working directory

data_path <- "my_data_filepath"
results_path <- "my_results_filepath"
setwd(results_path)

# ------------------------------------------------------------------------------------
# set global ggplot theme

theme_publication <- function(base_size = 12, base_family = "Helvetica", ...) {
      require(grid)
      require(ggthemes)
      (theme_foundation(base_size = base_size, base_family = base_family)
       + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(color = NA),
               plot.background = element_rect(color = NA),
               panel.border = element_rect(color = "black", size = 1),
               axis.title = element_text(face = "plain", size = rel(1)),
               axis.title.x = element_text(size = 8, margin = margin(t=10)),
               axis.title.y = element_text(size = 8, angle = 90, margin = margin(r=10)),
               axis.text.x = element_text(size = 7.5, angle = 0, margin = margin(t=2)),
               axis.text.y = element_text(size = 7.5, hjust = 1, margin = margin(r=2)),
               axis.ticks = element_line(),
               strip.text = element_text(size = 7.5),
               strip.background = element_blank(),
               plot.margin = unit(c(10,5,5,5),"mm"),
               panel.grid.minor = element_blank(),
               panel.grid.major.y = element_line(size=.5, color="#f0f0f0"),
               panel.grid.major.x = element_blank(),
               legend.key = element_rect(color = NA),
               legend.spacing = unit(0.1, "cm"),
               legend.margin = margin(t=3,r=3,b=3,l=3, unit="pt"),
               legend.title = element_text(size = 8, face = "plain"),
               legend.key.size = unit(.6, units = "line"),
               legend.text = element_text(size = 7),
               legend.background = element_rect(fill = "gray95", color = "gray20",
                                                size = 0.5, linetype = "dotted")
          ))
}

# ----------------------------------------------------------------------------------------------------------          
# load data

rawEmpInt <- read.csv(file.path(data_path, "empirical_interval_individual_lme_data.csv"))
catEmpInt <- read.csv(file.path(data_path, "empirical_summary_interval_lme_data.csv"))

# ---------------------------------------------------------------------------------------------------------- 
# omit Cuscuta (only used for supplementary materials)

# rawEmpInt <- rawEmpInt[!(rawEmpInt$group %in% "Cuscuta"), ]
# catEmpInt <- catEmpInt[!(catEmpInt$group %in% "Cuscuta"), ]

# ---------------------------------------------------------------------------------------------------------- 
# clean and reshape raw data for counts

rawEmpInt_long <- melt(rawEmpInt, 
                       id.vars = c("main", "group", "type", "dating", "constraint", "community_ID"), 
                       measure.vars = c("error_type.MPD_beta", "error_type.MNND_beta", "error_type.MPD_alpha",
                                        "error_type.MNND_alpha", "error_type.PD_alpha"), 
                       variable.name = "metric", 
                       value.name = "error_type")
                       
rawEmpInt_long$metric_ab <- gsub(".+\\.(.+)", "\\1", rawEmpInt_long$metric)    
rawEmpInt_long$metric <- gsub("(.+)\\_.+", "\\1", rawEmpInt_long$metric_ab)
rawEmpInt_long$ab <- gsub(".+\\_(.+)", "\\1", rawEmpInt_long$metric_ab)         

# aggregate counts
rawEmpInt_count <- ddply(rawEmpInt_long, .(main, group, type, dating, constraint, metric, ab, error_type), summarise, count = length(error_type))
rawEmpInt_count <- ddply(rawEmpInt_count, .(main, group, type, dating, constraint, metric, ab), transform, total = sum(count))
rawEmpInt_count <- na.omit(rawEmpInt_count)

# exclude RAxML dating method
rawEmpInt_count <- rawEmpInt_count[!(rawEmpInt_count$dating %in% "RAxML"), ]

# convert to factors
facvars <- c("error_type", "main", "group", "type", "dating", "constraint", "metric", "ab")
rawEmpInt_count[, facvars] <- lapply(rawEmpInt_count[, facvars], factor)

# ---------------------------------------------------------------------------------------------------------- 
# clean summary data

# exclude RAxML dating method
catEmpInt <- catEmpInt[!(catEmpInt$dating %in% "RAxML"), ]

# convert to factors
facvars <- c("tree", "type", "dating", "group", "metric", "ab", "constraint")
catEmpInt[, facvars] <- lapply(catEmpInt[, facvars], factor)


########################################################################
########## Comparisons of error by community type and dating method ############
########################################################################

model1_a_emp <- glm(cbind(error, 1800 - error) ~ dating * metric * type + constraint + group,      
          family = binomial(link = "logit"),
          data = catEmpInt[with(catEmpInt, ab %in% "alpha"), ])  
summary(model1_a_emp)
Anova(model1_a_emp, type = "III")

model1_b_emp <- glm(cbind(error, 1800 - error) ~ dating * metric * type + constraint + group,      
          family = binomial(link = "logit"),
          data = catEmpInt[with(catEmpInt, ab %in% "beta"), ])          
summary(model1_b_emp)
Anova(model1_b_emp, type = "III")

# goodness of fit
hoslem.test(model1_a_emp$y, fitted(model1_a_emp), g = length(coef(model1_a_emp)) + 1)
hoslem.test(model1_b_emp$y, fitted(model1_b_emp), g = length(coef(model1_b_emp)) + 1)

# contrasts
datingMeans_a_emp <- pmmeans(model1_a_emp, specs = ~ dating | metric + type) # highest error is "BLADJ", then "r8"
datingContr_a_emp <- contrast(datingMeans_a_emp, method = "pairwise", adjust = "holm")
datingContr_a_emp_df <- summary(datingContr_a_emp) 
datingContr_a_emp_ci <- confint(datingContr_a_emp)

datingMeans_b_emp <- pmmeans(model1_b_emp, specs = ~ dating | metric + type) # highest error is "BLADJ", then "r8"
datingContr_b_emp <- contrast(datingMeans_b_emp, method = "pairwise", adjust = "holm")
datingContr_b_emp_df <- summary(datingContr_b_emp) 
datingContr_b_emp_ci <- confint(datingContr_b_emp)

# marginal effects
eff1_a <- allEffects(model1_a_emp)
eff1_a_df <- as.data.frame(eff1_a[["dating:metric:type"]])
eff1_a_df$ab <- "Alpha" 

eff1_b <- allEffects(model1_b_emp)
eff1_b_df <- as.data.frame(eff1_b[["dating:metric:type"]])
eff1_b_df$ab <- "Beta"

eff1_df_emp <- rbind.data.frame(eff1_a_df, eff1_b_df)
eff1_df_emp$analysis <- "Emp"

save(eff1_df_emp, file = "eff1_df_emp.Rdata")
# this file is combined with the simulated data model in the simulation analysis script and together they form Figure_4



########################################################################
############### Comparisons of error type (Sign, type 1, type 2) #################
########################################################################

model2_emp <- glm(cbind(count, 1800 - count) ~ error_type * metric * ab + main + group + type + dating + constraint,     
          family = binomial(link = "logit"),
          data = rawEmpInt_count)  
summary(model2_emp)

# goodness of fit
hoslem.test(model2_emp$y, fitted(model2_emp), g = length(coef(model2_emp)) + 1)

# contrasts
Means_2_emp <- pmmeans(model2_emp, specs = ~ error_type | metric + ab) 
Means_2_emp_contr <- contrast(Means_2_emp, method = "pairwise", adjust = "holm")
Means_2_emp_contr_df <- summary(Means_2_emp_contr) 
Means_2_emp_contr_ci <- confint(Means_2_emp_contr)

# marginal effects
eff2_emp <- allEffects(model2_emp)
eff2_emp_df <- as.data.frame(eff2_emp[["error_type:metric:ab"]])
eff2_emp_df <- na.omit(eff2_emp_df)

eff2_emp_df_stacked <- ddply(eff2_emp_df, .(metric, ab), transform, total = sum(fit))
eff2_emp_df_stacked <- within(eff2_emp_df_stacked, prop <- fit / total)
eff2_emp_df_stacked <- droplevels(eff2_emp_df_stacked)
eff2_emp_df_stacked$error_type <- factor(eff2_emp_df_stacked$error_type, levels = rev(levels(eff2_emp_df_stacked$error_type)))

figS1b <- ggplot(eff2_emp_df_stacked, aes(x = metric, y = prop, fill = error_type)) +
    geom_bar(stat = "identity", color = "black", size = 0.3) +
    facet_grid(. ~ ab, scales = "free_x", space = "free_x") +
    scale_fill_tableau(palette = "tableau10", name = "Error Type", guide = guide_legend(reverse = TRUE)) +
    scale_y_continuous(breaks = seq(0, 1, by = 0.2), labels = percent) +
    labs(x = "Metric", y = "Probability of error") +
    theme_publication() +
    theme(legend.position = "none")
ggsave(figS1b, file = "Figure_S1b.pdf", height = 2.93, width = 3.25)






