## Task: Read in data and reproduce crucial data preparation steps
## Author: MA (lead), LB small additions
## Date: January 2021

################################################################################
## setup
################################################################################

library(here)
setwd(here("ReplicationFiles_AngstBrandenberger2021/"))

## load libraries
library("statnet")
library("stringr")
library("GGally")
library("ggplot2")
library("RColorBrewer")
library("proxy")
library("mice")
library("cluster")
library("factoextra")
library("DiagrammeR")
library("colourvalues")
library("tibble")
library("dplyr")
library("forcats")
library("Bergm")
library("reshape2")
library("scales")
library("xtable")
library("factoextra")

## set universal seed
set.seed(808)

## package not on cran:
# library("devtools")
# devtools::install_github("rmcelreath/rethinking")
library("rethinking")

## install custom ERGM terms via:
#install.packages("/1_Analysis/ergm.userterms_3.9.0.tar.gz", type = "source")
library(ergm.userterms) 


################################################################################
## read in anonymized data
################################################################################

for (file in list.files("0_Data")){
  assign(x = file,value = readRDS(paste("0_Data",file,sep = "/")))
}
rm(file)

# provisoire:

library(here)
setwd(here("ReplicationFiles_AngstBrandenberger2021/"))

################################################################################
## prepare policy belief data
################################################################################

# recode belief data (likert scale) to numerical

recode_policyvar.fun <- 
  Vectorize(
    function(entry){
      if (is.na(entry) == TRUE)
      {return(NA)}
      if (entry == "Stimme voll und ganz zu")
      {return(4)}
      if (entry == "Stimme eher zu")
      {return(3)}
      if (entry == "Lehne eher ab")
      {return(2)}
      if (entry == "Lehne voll und ganz ab")
      {return(1)}
      else
      {return(NA)}
    }    
  )

policybeliefvars_num <- apply(X = policybeliefvars[,],MARGIN = c(1,2),FUN = recode_policyvar.fun)

# make factors

policybeliefvars_num <- data.frame(lapply(data.frame(policybeliefvars_num), factor))

# ----- imputation of policybeliefs -----

imputed_policybeliefvars_num = mice::mice(policybeliefvars_num, m = 5, seed = 42) #polytomous logistic regression for factors

imputed_policybeliefvars_num_list <- lapply(c(1:imputed_policybeliefvars_num$m), function(x)
  complete(imputed_policybeliefvars_num, action = x))
imputed_policybeliefvars_num_list <- lapply(imputed_policybeliefvars_num_list, function(x)
  data.frame(lapply(x, as.numeric)))
for(mat_i in 1:length(imputed_policybeliefvars_num_list)){
  rownames(imputed_policybeliefvars_num_list[[mat_i]]) <- rownames(policybeliefvars)
}

# ---------- belief distance computation using gower  --------

# combining imputed policy beliefs, opponent and allies data
belief_opps_allies_mat_list <- lapply(imputed_policybeliefvars_num_list, function(x)
  cbind(x, occ_opps, occ_allies))
  

combined_gowerdist_list <- lapply(belief_opps_allies_mat_list, function(x)
  as.matrix(
    daisy(x,metric = "gower",
          type = list("ordratio" = colnames(x)[c(1:ncol(imputed_policybeliefvars_num_list[[1]]))],
                      "asymm" = colnames(x)[c((ncol(imputed_policybeliefvars_num_list[[1]]) + 1) : 
                                                                     ncol(x))]),
          weights = c(rep(ncol(x)/ncol(imputed_policybeliefvars_num_list[[1]]), 
                          times = ncol(imputed_policybeliefvars_num_list[[1]])),
                      rep(ncol(x)/(ncol(x)-ncol(imputed_policybeliefvars_num_list[[1]])), 
                          times = (ncol(x)-ncol(imputed_policybeliefvars_num_list[[1]])))
                      
          )
    )
  )
  )
# weights give equal weights to all belief vars combined and allies opps combined

# ---- set as poldist, subset for network participants
# and make sure order matches network object adjacency ----

poldist_list <- lapply(combined_gowerdist_list,"[",
                       rownames(survey_only_mat),rownames(survey_only_mat))

## standardize poldist between 0 and 1

poldist_list <- lapply(poldist_list, scales::rescale, to = c(0,1))

## change name of poldist matrix
# (for bergm) cannot use the same object name to pass to edgecov and twopathedgecov, 
# else edgeprob will struggle
# (gets converted to non-comformable type in edgecov)
poldist_mat_list <- poldist_list

# ---- all 1 matrix for a closing twopath stat without atts ---

ones_mat <- matrix(1,nrow(poldist_mat_list[[1]]),ncol(poldist_mat_list[[1]]))


# ---- compute expertise similarity -----

dissimilmat_exp_manhattan <- as.matrix(proxy::dist(expertise_df, method = 'Manhattan'))
# make sure it matches network
dissimilmat_exp_manhattan_directed <- dissimilmat_exp_manhattan[rownames(survey_only_mat),rownames(survey_only_mat)]

# ---- number of shared topics/issues between each pair of actors -----

sametopicsmat <- matrix(0, nrow = nrow(atts_surveyonly), ncol = nrow(atts_surveyonly))
rownames(sametopicsmat) <- atts_surveyonly$organization_coded
colnames(sametopicsmat) <- atts_surveyonly$organization_coded

for(i in 1:nrow(sametopicsmat)){
  for(j in 1:ncol(sametopicsmat)){
    if(i != j){
      issuesi <- ifelse(atts_surveyonly$issues_adjacency[i] == "NA", NA, atts_surveyonly$issues_adjacency[i])
      issuesi <- unlist(str_split(issuesi, ','))
      issuesj <- ifelse(atts_surveyonly$issues_adjacency[j] == "NA", NA, atts_surveyonly$issues_adjacency[j])
      issuesj <- unlist(str_split(issuesj, ','))
      sametopicsmat[i, j] <- length(intersect(issuesi, issuesj))
    }
  }
}

# ensure order matches
sametopicsmat_directed <- sametopicsmat[rownames(survey_only_mat),rownames(survey_only_mat)]

# ----- number of shared fora between each pair of actors -----

forummat <- matrix(0, nrow = nrow(atts_surveyonly), ncol = nrow(atts_surveyonly))
rownames(forummat) <- atts_surveyonly$organization_coded
colnames(forummat) <- atts_surveyonly$organization_coded

for(i in 1:nrow(forummat)){
  for(j in 1:ncol(forummat)){
    if(i != j){
      forai <- ifelse(atts_surveyonly$forums_adjacency[i] == "NA", NA, atts_surveyonly$forums_adjacency[i])
      forai <- unlist(str_split(forai, ','))
      foraj <- ifelse(atts_surveyonly$forums_adjacency[j] == "NA", NA, atts_surveyonly$forums_adjacency[j])
      foraj <- unlist(str_split(foraj, ','))
      forummat[i, j] <- length(intersect(forai, foraj))
    }
  }
}

forummat_directed <- forummat[rownames(survey_only_mat),rownames(survey_only_mat)]

# --- number of fora per actor ----

atts_surveyonly$nr_forums_ordinal <- ifelse(atts_surveyonly$nr_forums >= 4,"more than 4",
                                            as.character(atts_surveyonly$nr_forums))
atts_surveyonly$nr_forums_ordinal[atts_surveyonly$nr_forums_ordinal == "0"] <- "no forums"
atts_surveyonly$nr_forums_ordinal[atts_surveyonly$nr_forums_ordinal %in% c("2","3")] <- "2-3 forums"

