## Task: Analyzing information exchange network and information brokers
## Author: MA (lead), LB small additions
## Date: January 2021

################################################################################
################################################################################
################################################################################

## specify global theme option for publication-ready graphs using ggplot2
publicationtheme <- theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
                          panel.background =  element_blank(), #element_rect(colour = NA),
                          plot.background = element_rect(colour = NA, fill = NA),
                          axis.title = element_text(face = "bold",size = rel(1)),
                          axis.title.y = element_text(angle=90,vjust =2),
                          axis.title.x = element_text(vjust = -0.2),
                          axis.text = element_text(), 
                          axis.line = element_line(colour="black"),
                          axis.ticks = element_line(),
                          panel.grid.major.y = element_line(colour="#f0f0f0"),
                          panel.grid.major.x = element_line(colour="#f0f0f0"),
                          panel.grid.minor = element_blank(),
                          legend.position = "bottom",
                          legend.key = element_blank(),
                          legend.key.size= unit(1, "cm"),
                          legend.text = element_text(face = "bold",size = rel(1)),
                          legend.background = element_blank(),
                          plot.margin=unit(c(10,5,5,5),"mm"),
                          strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
                          strip.text = element_text(face="bold") )

################################################################################
## Summary statistics and Clustering
################################################################################

##############
## Determining number of clusters
## Figure S2 in the supplementary materials
##############

# use gower dist based on first mice imputation run
distmat <- combined_gowerdist_list[[1]]

# finding number of clusters (silhouette and wss)

sil_p <- fviz_nbclust(x = as.matrix(distmat),
             FUNcluster = pam,
             method = "silhouette", diss = distmat,
             k.max = 20)
sil_p
ggsave("1_Analysis/output/FigureS2a_silhouette.pdf", width = 24, height = 10, units = 'cm')
#output: Figure 2a in the Supplementary Materials

wss_p <- fviz_nbclust(x = as.matrix(distmat),
             FUNcluster = pam,
             method = "wss", diss = distmat,
             k.max = 20)
wss_p
ggsave("1_Analysis/output/FigureS2b_wss.pdf", width = 24, height = 10, units = 'cm')
#output: Figure 2b in the Supplementary Materials

# ----- 3 cluster solution ------

grouping_kmedoids <- pam(x = distmat, 3, diss = T, keep.diss = T) # 3 cluster solution

cluster_solution <- factor(grouping_kmedoids$clustering[agg_atts$organization_coded]) # order according to aggregated atts
agg_atts$belief_cluster <- cluster_solution # attach to agg_atts

# actor types per cluster
table(agg_atts$belief_cluster,agg_atts$type_reduced)

##############
## Cluster solution details
## Figure S3 in the supplementary materials
##############

cluster_solution <- factor(grouping_kmedoids$clustering[rownames(policybeliefvars)])

plot_df <- data.frame(imputed_policybeliefvars_num_list[[1]], 
                      cluster = cluster_solution,
                      type = agg_atts$type_reduced)

colnames(plot_df) <- c("Fertilizer input \n into streams \n should be reduced",
                       "Landscape protection \n more important \n than hydropower",
                       "Agriculture \n more important \n than revitalisation",
                       "Support subsidies \n for large \n hydropower plants",
                       "Small-scale hydropower \n  has great potential",
                       "Support regionalization \n of water supply",
                       "Municipal input into \n hydropower project \n planning important",
                       "Flood protection too \n encumbered by nature \n protection laws",
                       "cluster",
                       "type")

jittered_plot <- function(data, mapping, width = 0.2, height = 0.2, alpha = 0.85, ...){
  ggplot(data = data, mapping = mapping) +
    geom_jitter(...)
}

ggpairs(plot_df, mapping = aes(color = cluster),
        legend = c(1,1), switch = "both",
        lower = (list(continuous = jittered_plot)), 
        columns = c(1:8),
        diag = list(continuous = wrap("barDiag", binwidth = 1)),
        upper = "blank") + theme_minimal(base_size = 8)
ggsave("1_Analysis/output/FigureS3_clustering_overview.pdf", width = 24, height = 24, units = 'cm')
##output: Figure 3 in the Supplementary Materials

##############
## Median summary for cluster solutions
## Table A1 in the article appendix
##############

median_summary <-
  plot_df[,!(colnames(plot_df) %in% "type")] %>% 
  group_by(cluster) %>%
  summarize_all(median)

# output latex table
xtable::xtable(median_summary, digits = 0)

##############
## Cluster-cluster information flows
## Figure 2 in the article
##############

## prepare cluster-cluster edgelist
cluster_cluster_el <- apply(info_el_directed[,c(1,2)], c(1,2), function(x) 
  as.numeric(agg_atts$belief_cluster[agg_atts$organization_coded == x]))

# occurence of every tie type
cl_cl_el <- data.frame(table(apply(cluster_cluster_el, 1, paste, collapse = "->")))
splits <- unlist(strsplit(as.character(cl_cl_el$Var1),split = "->"))
cl_cl_el$sender <- splits[seq.default(from = 1,to = length(splits)-1,by = 2)]
cl_cl_el$receiver <- splits[seq.default(from = 2,to = length(splits),by = 2)]

## plot
cl_cl_graph <- 
  create_graph() %>%
  add_nodes_from_table(
    table = data.frame(nodes = unique(c(cl_cl_el$sender,cl_cl_el$receiver)),
                       lab = c(
                         "Pro-Ecology",
                         "Neutral/Adminstrative",
                         "Pro-Development"
                       )),
    label_col = lab) %>%
  add_edges_from_table(
    table = cl_cl_el[,c(2:4)],
    from_col = sender,
    to_col = receiver,
    from_to_map = nodes)
cl_cl_graph <-
  cl_cl_graph %>%
  select_edges() %>%
  set_edge_attrs(edge_attr = penwidth,values = (cl_cl_graph$edges_df$Freq)/10) #%>%
  # set_edge_attrs(edge_attr = label,values = as.character(cl_cl_graph$edges_df$Freq))
## render graph
DiagrammeR::render_graph(cl_cl_graph)
## create new function because diagrammR is faulty
save_dot <- function(graphv, filename){
  dot_output <- generate_dot(graphv)
  dot_output <- gsub("\'","\"",dot_output) #because diagrammR does this wrong
  cat(dot_output,file = paste(filename))
}
## save plot in .dot format (Figure was then rendered via .dot)
save_dot(cl_cl_graph, filename = "1_Analysis/output/Figure2_cluster_cluster_dot.dot")
##output: Figure 2 in the article

##############
# Brokering information exchange
# Policy stance differences among two nodes (i, j) that are brokered by a third node k.
# Political distance each two-path brokered by different actor types, spans.
# Figure 3 in the article
##############

## use first mice run
poldist_imp1 <- poldist_list[[1]]

## manipulate data
dtpoldist_ij_forallpaths <- NULL
for(k in 1:nrow(atts_surveyonly)){
  # who gave info to k? = i
  i_gaveinfotok <- names(which(survey_only_mat[,atts_surveyonly$organization_coded[k]] == 1))
  # who received info from k? = j
  j_receivedinfofromk <- names(which(survey_only_mat[atts_surveyonly$organization_coded[k],] == 1))
  
  ## for each pair i-j => get poldist_imp1
  if(length(i_gaveinfotok) != 0 & length(j_receivedinfofromk)!= 0){
    for(i in i_gaveinfotok){
      for(j in j_receivedinfofromk){
        # 
        if(is.null(dtpoldist_ij_forallpaths)){
          dtpoldist_ij_forallpaths <- data.frame(k = atts_surveyonly$organization_coded[k], 
                                                 ktype = atts_surveyonly$type_reduced[k],
                                                 i = i, 
                                                 j = j,
                                                 poldistij = poldist_imp1[which(rownames(poldist_imp1) == i), which(rownames(poldist_imp1)==j)])
        }else{
          dtpoldist_ij_forallpaths <- rbind(dtpoldist_ij_forallpaths, data.frame(k = atts_surveyonly$organization_coded[k], 
                                                                                 ktype = atts_surveyonly$type_reduced[k],
                                                                                 i = i, 
                                                                                 j = j,
                                                                                 poldistij = poldist_imp1[which(rownames(poldist_imp1) == i), which(rownames(poldist_imp1)==j)]))
        }
      }
    }
  }
}

## create labels
table(atts_surveyonly$type_reduced)
dtpoldist_ij_forallpaths$ktype_label <- as.character(dtpoldist_ij_forallpaths$ktype)
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Federal administration'] <- 'Federal administration (N=16)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'State administration'] <- 'State administration (N=16)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Local Administration'] <- 'Local administration (N=28)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Service Providers'] <- 'Service providers (N=24)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Private sector'] <- 'Private sector (N=31)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Science'] <- 'Science (N=9)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Interest Groups'] <- 'Interest groups (N=39)'
dtpoldist_ij_forallpaths$ktype_label[dtpoldist_ij_forallpaths$ktype_label == 'Other actors'] <- 'Other actors (N=18)'
dtpoldist_ij_forallpaths$ktype_label <- factor(dtpoldist_ij_forallpaths$ktype_label, 
                                               levels = c('Other actors (N=18)',
                                                          'Interest groups (N=39)',
                                                          'Science (N=9)', 
                                                          'Private sector (N=31)', 
                                                          'Service providers (N=24)', 
                                                          'Local administration (N=28)', 
                                                          'State administration (N=16)', 
                                                          'Federal administration (N=16)'))
##
dtpoldist_ij_forallpaths$nr_twopaths <- NA
give.n <- function(x){
  y = .95
  label = paste0("(n=",length(x),")")
  return(data.frame(y, label))
}

## plot and save
ggplot(dtpoldist_ij_forallpaths, aes(x = ktype_label, y = poldistij))+
  geom_boxplot() +
  geom_jitter(alpha = .1, size = 1, height  = 0.05) +
  stat_summary(fun.data = give.n, geom = "text") +
  coord_flip() +
  publicationtheme +
  #xlab("Actor types who broker two-paths") +
  xlab("Broker k by actor types") +
  ylab("Difference in policy stances between i,j\n 0 = max. similar, 1 = max. dissimilar") +
  ggtitle("Difference in policy stances between i and j that broker k bridges", 
          subtitle = "Observations correspond to two-paths") +
  theme(
    plot.title = element_text(hjust = 0),
    plot.subtitle = element_text(hjust = 0)
  )
ggsave("1_Analysis/output/Figure3_brokering_distanceij_boxplot.pdf", width = 24, height = 10, units = 'cm')
##output: Figure 3 in the article

##############
## Summary table of variables
## Table 1 in the Supplementary Materials
##############

## nr issues
#agg_atts$issues_adjacency
agg_atts$nrissues <- stringr::str_count(agg_atts$issues_adjacency, ",")
missues <- data.frame(organization_coded = agg_atts$organization_coded, 
                      variable = rep("nIssues", nrow(agg_atts)), value = agg_atts$nrissues)

## nr projects
#agg_atts$projects_adjacency
agg_atts$nrprojects <- stringr::str_count(agg_atts$projects_adjacency, ",")
mprojects <- data.frame(organization_coded = agg_atts$organization_coded, 
                        variable = rep("nProjects", nrow(agg_atts)), value = agg_atts$nrprojects)

## actor types
#agg_atts$type_reduced
mtypes <- data.frame(organization_coded = agg_atts$organization_coded, 
                     variable = agg_atts$type_reduced, value = rep(1, nrow(agg_atts)))

## policy beliefs
#head(policybeliefvars)
#head(policybeliefvars_num)
policybeliefvars_num$organization_coded <- rownames(policybeliefvars)
mpolicy <- reshape2::melt(policybeliefvars_num, id.vars = 'organization_coded')
mpolicy$value <- as.numeric(mpolicy$value)

## expertise 
mexpert <- reshape2::melt(expertise_df)
names(mexpert) <- c("organization_coded", "variable", "value")

## participation in forums
#agg_atts$forums_adjacency
agg_atts$forumsnr <- stringr::str_count(agg_atts$forums_adjacency, ",")
mforums <- data.frame(organization_coded = agg_atts$organization_coded, 
                      variable = rep("nrForums"), value = agg_atts$forumsnr )

## combine all data sets: 
msummarydt <- rbind(missues, mprojects, mtypes, mpolicy, mexpert, mforums)

## summary table
actors <- rownames(forummat)
dtsum <- msummarydt[msummarydt$organization_coded %in% actors,] %>%
  group_by(variable) %>%
  summarise(n = n(), 
            mean = mean(value, na.rm = TRUE), 
            sd = sd(value, na.rm = TRUE),
            min = min(value, na.rm = TRUE), 
            quart1 = quantile(value, na.rm = TRUE)[2],
            median = median(value, na.rm = TRUE),
            quart3 = quantile(value, na.rm = TRUE)[4],
            max = max(value, na.rm = TRUE),
            missing = sum(ifelse(is.na(value), 1, 0))
  ) 
dtsum         
##Latex table: print(xtable::xtable(dtsum), include.rownames = FALSE)
## output: Table S1 in the Supplementary Materials

##############
## Summary bar chart of actor types
## Figure S1 in the Supplementary Materials
##############

##
mtypes$variable <- factor(mtypes$variable, 
                          levels = c("Federal administration",
                                     "State administration",
                                     "Local Administration",
                                     "Interest Groups" ,
                                     "Private sector" ,
                                     "Other actors" ,
                                     "Service Providers"  ,
                                     "Science"         ,
                                     "Politics" ), 
)

## plot
ggplot(mtypes[mtypes$organization_coded %in% actors,], aes(x = fct_rev(variable)))+
  geom_bar(stat = 'count')+
  coord_flip() +
  publicationtheme +
  ylab("Number of respondents") + xlab("") +
  geom_text(stat='count', aes(label=..count..), hjust=-.5)
ggsave(file = "1_Analysis/output/FigureS1_summary_actortype.pdf", width = 20, height = 10, units = 'cm')
## output: Figure 1 in Supplementary Materials

##############
## List of governance issues
## Table 2 in the Supplementary Materials
##############

dtiss <- el.issues[el.issues$sender_coded %in% actors,] %>%
  group_by(issue = issue_coded) %>%
  summarise(n = n(), 
            perc = round(n()/length(actors)*100,1))
dtiss
#Latex table: print(xtable::xtable(dtiss), include.rownames = FALSE)
# output: Table 2 in the Supplementary Materials

################################################################################
## BERGM
################################################################################

##############
## Preparations for the BERGM
##############

## load custom functions
source("1_Analysis/custom_bergm_functions.R")

## choice of actor types
table(atts_surveyonly$type_reduced)
type_levels_chosen <- c('Science', 
                        'Service Providers', 
                        'Interest Groups',
                        'Private sector',
                        'State administration',
                        'Federal administration')

##############
## Model 0 - whole network for priors for next model
## Figure A2 in the article appendix
## Goodness-of-fit: Figure S4 in the Supplementary Materials
##############

# specify priors
prior.meanma0miss <- c(-4, #assuming: negative density
                       rep(0,2*length(type_levels_chosen)),#let's be unsure about nodefactor type for the moment
                       0, #unsure indegree
                       0, #unsure outdegree
                       1, #positive gwesp
                       rep(1,length(type_levels_chosen))#positive type homophily
)
prior.sigmama0miss <- diag(c(1, #assuming: negative density
                             rep(3,2*length(type_levels_chosen)),#let's be unsure about nodefactor type for the moment
                             3, #unsure indegree
                             3, #unsure outdegree
                             1, #positive gewsp
                             rep(2,length(type_levels_chosen))#positive type homophily
),
nrow = length(prior.meanma0miss), 
ncol = length(prior.meanma0miss))

## run bergm
## This model takes a while to run, if you want, you can read in the result instead.
m0b <- readRDS("1_Analysis/output/bergm_fit_total_missing.RDS")
m0b <- Bergm::bergmM(info_nw_total_missing~
                       edges
                     + nodeofactor("type_reduced",
                                   levels = type_levels_chosen)
                     + nodeifactor("type_reduced",
                                   levels = type_levels_chosen)
                     + gwidegree(0.1, fixed = TRUE)
                     + gwodegree(1, fixed = TRUE)
                     + gwesp(0.2, fixed = TRUE)
                     + nodematch("type_reduced", diff = TRUE,
                                 levels = c(type_levels_chosen)) ,
                     prior.mean  = prior.meanma0miss,
                     prior.sigma = prior.sigmama0miss,
                     burn.in     = 100,
                     main.iters  = 2000,
                     aux.iters   = 2000,
                     nchains     = 4,
                     gamma       = 0.5,
                     missingUpdate = 500,
                     seed = 808)

## save for priors
post_m0b <- as.data.frame(m0b$Theta)
colnames(post_m0b) <- m0b$specs

# name coefficients
modterms.m0b <- c(
  'Edges (=intercept)',
  'Activity: Science', 
  'Activity: Service provider',
  'Activity: Interest group',
  'Activity: Private sector',
  'Activity: State admin', 
  'Activity: Federal admin', 
  'Popularity: Science', 
  'Popularity: Service provider',
  'Popularity: Interest group',
  'Popularity: Private sector',
  'Popularity: State admin', 
  'Popularity: Federal admin', 
  'Indegree distribution (gwindeg)',
  'Outdegree distribution (gwodeg)',
  'Triadic closure (gwesp)',
  'Actor type homophily: Science',
  'Actor type homophily: Service Providers',
  'Actor type homophily: Interest Groups',
  'Actor type homophily: Private sector',
  'Actor type homophily: State admin',
  'Actor type homophily: Federal admin')

# check summary of model and plot outputs
summary(m0b)
p0 <- plot_fun_bergm(m0b, modelterms = modterms.m0b)
ggsave(p0, file="1_Analysis/output/FigureA2_bergm_m0b.pdf", width = 28, heigh = 14, units = 'cm')
##output: Figure A2 in the article appendix. 

## run goodness-of-fit statistics
par(mfrow = c(1,1))
pdf(file = "1_Analysis/output/FigureS4_bergm_m0b_gof.pdf", width = 6, height = 7)
my_d_bgof(m0b,
          sample.size = 100,
          n.odeg = 10,
          n.ideg = 10,
          n.dist = 20,
          n.esp = 15,
          n.dsp = 10)
dev.off()
##output: Figure 2 in the Supplementary Materials.

##############
## Model 1 - presented in the article
## Figure A3 in the article appendix
## Goodness-of-fit: Figure S5 in the Supplementary Materials
##############

## specify priors
# specify priors (in part based on m0 :)
prior.meanma1 <- c(mean(post_m0b$edges), #based on m0
                   apply(post_m0b[,grepl("nodeofactor",
                                         colnames(post_m0b))],2,mean),# nodefactor based on m0
                   apply(post_m0b[,grepl("nodeifactor",
                                         colnames(post_m0b))],2,mean),# nodefactor based on m0
                   rep(0,2*(length(unique(atts_surveyonly$nr_forums_ordinal))-1)),#let's be unsure about nodefactor forum for the moment
                   mean(post_m0b[,grepl("gwideg",
                                        colnames(post_m0b))]), #gwideg based on m0
                   mean(post_m0b[,grepl("gwodeg",
                                        colnames(post_m0b))]), #gwodeg based on m0
                   mean(post_m0b[,grepl("gwesp",
                                        colnames(post_m0b))]), #gwesp based on m0
                   apply(post_m0b[,grepl("nodematch",
                                         colnames(post_m0b))],2,mean),# homophily based on m0
                   0, #unsure same forums
                   0, #unsure same issues
                   0, #unsure expertise
                   -1, #negative belief distance
                   rep(0, 6 + (length(unique(atts_surveyonly$nr_forums_ordinal))-1)), #unsure all twopaths interactions (6 org types + n forums)
                   -1, # negative general effect of poldist i-j on closing twopaths
                   -1 # negative general closing twopaths
)
prior.sigmama1 <- diag( c(0.1, #based on m0
                          rep(0.1,sum(grepl("nodeofactor", colnames(post_m0b)))),# nodefactor based on m0
                          rep(0.1,sum(grepl("nodeifactor", colnames(post_m0b)))),# nodefactor based
                          rep(3,2*(length(unique(atts_surveyonly$nr_forums_ordinal))-1)),#let's be unsure about nodefactor forum for the moment
                          sd(post_m0b[,grepl("gwideg",
                                             colnames(post_m0b))]), #gwideg based on m0
                          sd(post_m0b[,grepl("gwodeg",
                                             colnames(post_m0b))]), #gwodeg based on m0
                          sd(post_m0b[,grepl("gwesp",
                                             colnames(post_m0b))]), #gwesp based on m0
                          rep(0.1,
                              sum(grepl("nodematch", colnames(post_m0b)))),# homophily based on m0
                          3, #unsure same forums
                          3, #unsure same issues
                          3, #unsure expertise
                          1, #negative belief distance
                          rep(3, 6 + (length(unique(atts_surveyonly$nr_forums_ordinal))-1)
                          ), #unsure all twopaths interactions (6 org types + n forums)
                          1, #negative general effect of belief distance for closing twopaths
                          1 #negative general closing twopaths
),
nrow = length(prior.meanma1), 
ncol = length(prior.meanma1))
# run bergm

m1_posterior_list <-
  lapply(c(1:length(poldist_list)), function(mat_index){
    m1 <- bergm(info_nw_directed~
                  edges
                + nodeofactor("type_reduced",
                              levels = type_levels_chosen)
                + nodeifactor("type_reduced",
                              levels = type_levels_chosen)
                + nodeofactor("nrforums_ordinal",
                              levels = unique(
                                atts_surveyonly$nr_forums_ordinal)[unique(atts_surveyonly$nr_forums_ordinal) != 
                                                                     "no forums"])
                + nodeifactor("nrforums_ordinal",
                              levels = unique(
                                atts_surveyonly$nr_forums_ordinal)[unique(atts_surveyonly$nr_forums_ordinal) != 
                                                                     "no forums"])
                + gwidegree(0.1, fixed = TRUE)
                + gwodegree(1, fixed = TRUE)
                + gwesp(0.2, fixed = TRUE)
                + nodematch("type_reduced", diff = TRUE,
                            levels = c(type_levels_chosen))
                + edgecov(forummat_directed)
                + edgecov(sametopicsmat_directed)
                + edgecov(dissimilmat_exp_manhattan_directed)
                + edgecov(poldist_list[[mat_index]])
                + twopathedgcovby(poldist_mat_list[[mat_index]],
                                  "poldist",
                                  "nrforums_ordinal",
                                  # reference category: no forums
                                  unique(atts_surveyonly$nr_forums_ordinal)[unique(atts_surveyonly$nr_forums_ordinal) != 
                                                                              "no forums"]
                )
                + twopathedgcovby(poldist_mat_list[[mat_index]],
                                  "poldist",
                                  "type_reduced",
                                  type_levels_chosen)
                + twopathedgcov(poldist_mat_list[[mat_index]])
                + twopathedgcov(ones_mat),
                prior.mean  = prior.meanma1,
                prior.sigma = prior.sigmama1,
                burn.in     = 2000,
                main.iters  = 4000,
                aux.iters   = 10000,
                nchains     = 8,
                gamma       = 0.5,
                seed = 808)
    posterior <- as.matrix(m1$Theta)
    colnames(posterior) <- m1$specs
    return(list(m1,posterior))
  }
  )

m1_posterior_combined <- do.call("rbind",lapply(m1_posterior_list,"[[",2))

## summary stats
# plot_fun_bergm(m1)

## model names
modterms.dep_by_simple <- c(
  'Edges (=intercept)',
  'Activity: Science', 
  'Activity: Service provider',
  'Activity: Interest group',
  'Activity: Private sector',
  'Activity: State admin', 
  'Activity: Federal admin', 
  'Popularity: Science', 
  'Popularity: Service provider',
  'Popularity: Interest group',
  'Popularity: Private sector',
  'Popularity: State admin', 
  'Popularity: Federal admin', 
  'Activity: 4+ forums', 
  'Activity: 1 forum', 
  'Activity: 2 -3 forums', 
  'Popularity: 4+ forums', 
  'Popularity: 1 forum', 
  'Popularity: 2-3 forums', 
  'Indegree distribution (gwindeg)',
  'Outdegree distribution (gwodeg)',
  'Triadic closure (gwesp)',
  'Actor type homophily: Science',
  'Actor type homophily: Service Providers',
  'Actor type homophily: Interest Groups',
  'Actor type homophily: Private sector',
  'Actor type homophily: State admin',
  'Actor type homophily: Federal admin',
  'Attending same forums',
  'Working on same policy issues',
  'Similar expertise', 
  'Dissimilar policy stances',
  'Two-path interaction: broker = 4+ forums',
  'Two-path interaction: broker = 1 forum',
  'Two-path interaction: broker = 2-3 forums',
  'Two-path interaction: broker = Science', 
  'Two-path interaction: broker = Service provider',
  'Two-path interaction: broker = Interest group',
  'Two-path interaction: broker = Private sector',
  'Two-path interaction: broker = State admin', 
  'Two-path interaction: broker = Federal admin', 
  'Two-paths main effect: bridging i-j differences',
  'Two-paths main effect: general two-path closing')

## create coefficient plot
p1 <- plot_fun_bergm(m1_posterior_combined, modelterms = modterms.dep_by_simple, raw_posterior = TRUE)
ggsave(p1, file="1_Analysis/output/FigureA3_bergm_final.pdf", width = 28, 
       height = 30, units = 'cm')
## output: Figure A3 in the article appendix

## Goodness of fit for Model 1 (exemplary - only first of the 5 imputation models)
par(mfrow = c(1,1))
pdf(file = "1_Analysis/output/FigureS5_bergm_final_gof.pdf", width = 6, height = 7)
my_d_bgof(m1_posterior_list[[1]][[1]],
          sample.size = 1000,
          n.odeg = 50,
          n.ideg = 10,
          n.dist = 20,
          n.esp = 30,
          n.dsp = 10)
dev.off()
## output: Figure S5 in Supplementary Materials

##############
## Predicted probabilities from Model 1
## Figures 4 and 5 in the article (predicted probabilities for broker types)
##############

## get posteriors
posterior <- m1_posterior_combined
colnames(posterior) <- gsub("\\_list\\[\\[mat_index\\]\\]","",colnames(posterior))

log_odds_to_prob <- Vectorize(function(x){
  odds <- exp(x)
  prob <- odds/ (1+odds)
  prob
})

## probability of tie
get_prob_of_brokered_tie <- Vectorize(function(broker_type, to_type, i_j_poldist){
  
  tie_log_odds <-
    posterior[,"edges"]
  
  if (sum(grepl(broker_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # activity of broker
      posterior[,grepl(pattern = paste0("nodeofactor.type_reduced.",broker_type),
                       colnames(posterior))] +
      # broker is of broker type
      posterior[,grepl(pattern = paste0("twopathedgcovby.poldist.type_reduced.",broker_type),
                       colnames(posterior))]*
      i_j_poldist
  }
  
  if (sum(grepl(to_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # popularity of receiver
      posterior[,grepl(pattern = paste0("nodeifactor.type_reduced.",to_type),
                       colnames(posterior))]
  }
  
  tie_log_odds <- tie_log_odds +
    # forum main fx set to baseline (omitted)
    # forum interaction fx set to baseline (omitted)
    # adds first indegree tie (sample from dist later?)
    posterior[,"gwideg.fixed.0.1"]*0.1*1 +
    # adds first out degree tie
    posterior[,"gwodeg.fixed.1"]*1*1 +
    # closes mean triangles (sample from triangle dist later, summary(info_nw_directed ~ gwesp(0.1)))
    posterior[,"gwesp.fixed.0.2"]*0.2*1 +
    # attend not common forums (omit term)
    # median common number of issues
    posterior[,"edgecov.sametopicsmat_directed"]*median(sametopicsmat_directed) +
    # mean dissimilarity in expertise
    posterior[,"edgecov.dissimilmat_exp_manhattan_directed"]*
    mean(dissimilmat_exp_manhattan_directed) +
    # mean dissimilarity in policy stances between broker and to_node
    posterior[,"edgecov.poldist"]*
    mean(unlist(poldist_list)) +
    # baseline forum attendance (omit interaction term)
    # two-path main effect (i-j diff)
    posterior[,"twopathedgcov.poldist_mat"]*
    i_j_poldist +
    # two_path main effect 2 (simple two-path closing)
    posterior[,"twopathedgcov.ones_mat"]
  
  # if to-actor and broker are same type:
  if(identical(to_type,broker_type) & (broker_type %in% type_levels_chosen)){
    tie_log_odds <-
      tie_log_odds +
      posterior[,paste0("nodematch.type_reduced.",broker_type)]
  }
  
  log_odds_to_prob(tie_log_odds)
})

get_prob_of_k_j_tie <- Vectorize(function(broker_type, to_type){
  
  tie_log_odds <-
    posterior[,"edges"]
  
  if (sum(grepl(broker_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # activity of broker
      posterior[,grepl(pattern = paste0("nodeofactor.type_reduced.",broker_type),
                       colnames(posterior))]
  }
  
  if (sum(grepl(to_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # popularity of receiver
      posterior[,grepl(pattern = paste0("nodeifactor.type_reduced.",to_type),
                       colnames(posterior))]
  }
  
  tie_log_odds <- tie_log_odds +
    # forum main fx set to baseline (omitted)
    # forum interaction fx set to baseline (omitted)
    # adds first indegree tie (sample from dist later?)
    posterior[,"gwideg.fixed.0.1"]*0.1*1 +
    # adds first out degree tie
    posterior[,"gwodeg.fixed.1"]*1*1 +
    # closes mean triangles (sample from triangle dist later, summary(info_nw_directed ~ gwesp(0.1)))
    posterior[,"gwesp.fixed.0.2"]*0.2*1 +
    # attend not common forums (omit term)
    # median common number of issues
    posterior[,"edgecov.sametopicsmat_directed"]*median(sametopicsmat_directed) +
    # mean dissimilarity in expertise
    posterior[,"edgecov.dissimilmat_exp_manhattan_directed"]*
    mean(dissimilmat_exp_manhattan_directed) +
    # mean dissimilarity in policy stances between broker and to_node
    posterior[,"edgecov.poldist"]*
    mean(unlist(poldist_list))
  # baseline forum attendance (omit interaction term)
  
  # if to-actor and broker are same type:
  if(identical(to_type,broker_type) & (broker_type %in% type_levels_chosen)){
    tie_log_odds <-
      tie_log_odds +
      posterior[,paste0("nodematch.type_reduced.",broker_type)]
  }
  
  log_odds_to_prob(tie_log_odds)
})

## check some means
mean(get_prob_of_brokered_tie(broker_type = "State admin",to_type = "Federal",
                              mean(unlist(poldist_list))))
mean(get_prob_of_brokered_tie(broker_type = "other",to_type = "other",
                              mean(unlist(poldist_list))))
mean(get_prob_of_k_j_tie(broker_type = "State admin",to_type = "Federal"))
mean(get_prob_of_k_j_tie(broker_type = "other",to_type = "other"))
mean(get_prob_of_k_j_tie(broker_type = "State admin",to_type = "State admin"))
mean(get_prob_of_k_j_tie(broker_type = "Local Administration",
                         to_type = "Private sector"))

## prepare data frame for plot 
post_pred_prob_df <- expand.grid(seq.int(0,1,0.1),
                                 c(type_levels_chosen),
                                 c(type_levels_chosen),
                                 stringsAsFactors = FALSE)
colnames(post_pred_prob_df) <- c("i_j_poldist","to_type","broker_type")

##
post_pred_prob_df$mean_prob <- apply(post_pred_prob_df,1,function(x) 
  mean(get_prob_of_brokered_tie(x[[3]],x[[2]],as.numeric(x[[1]]))))

post_pred_prob_df$upper67_PI_prob <- apply(post_pred_prob_df,1,function(x) 
  PI(get_prob_of_brokered_tie(x[[3]],x[[2]],as.numeric(x[[1]])),prob = 0.67)[2])

post_pred_prob_df$lower67_PI_prob <- apply(post_pred_prob_df,1,function(x) 
  PI(get_prob_of_brokered_tie(x[[3]],x[[2]],as.numeric(x[[1]])),prob = 0.67)[1])

post_pred_prob_df$mean_prob_non_brokered <- apply(post_pred_prob_df,1,function(x) 
  mean(get_prob_of_k_j_tie(broker_type = x[[3]],to_type = x[[2]])))

##
plot_df3 <- aggregate(post_pred_prob_df[,-c(1,2,3)],
                      by = list(broker_type = post_pred_prob_df$broker_type,
                                i_j_poldist = post_pred_prob_df$i_j_poldist),
                      FUN = mean)
# plot_df3$linelabel <- ifelse(plot_df3$i_j_poldist == 0.1,plot_df3$broker_type,NA)
# no service providers (no hyps anymore)
plot_df3 <- plot_df3[plot_df3$broker_type != "Service Providers",]

facet_labeller <- paste("k:",unique(plot_df3$broker_type))
names(facet_labeller) <- unique(plot_df3$broker_type)

overall_mean <- aggregate(post_pred_prob_df[,-c(1,2,3)],
                          by = list(i_j_poldist = post_pred_prob_df$i_j_poldist),
                          FUN = mean)
ggplot(plot_df3, 
       aes(x = i_j_poldist, y = mean_prob,
           ymin = lower67_PI_prob,
           ymax = upper67_PI_prob)) + 
  geom_ribbon(alpha = 0.15, size = 0.8, 
              # aes(fill = factor(broker_type)), 
              linetype = "blank", fill = "#C8C8C8") +
  geom_line(alpha = 0.8, size = 1) + 
  geom_line(data = overall_mean, alpha = 0.8, size = 1, color = "gray", 
            mapping = aes(y = mean_prob, x = i_j_poldist), linetype = "dotted") +
  theme_minimal() + 
  ylab("Posterior predicted probability of tie k -> j \n closing two-path i -> k -> j") +
  xlab("Standardized difference in policy beliefs between actors i and j (endpoints of two-path i -> k -> j)") +
  # ggtitle("Impact of broker type on cross-divide bridging",
  #         "Change in probability of tie k -> j closing two-path i -> k -> j with increasing difference in policy beliefs between i and j, depending on type of broker k
  #          Mean posterior predicted probability and 67 percent posterior density interval shown"
  # ) +
  scale_x_continuous(breaks = c(0,1), labels = c("min","max")) +
  facet_wrap(~ factor(broker_type, labels = facet_labeller), nrow = 1) + 
  guides(fill = FALSE, color = FALSE) + theme(strip.text = element_text(size=15),
                                              axis.title.x = element_text(size=15),
                                              axis.title.y = element_text(size=15),
                                              axis.text = element_text(size=12),
                                              panel.spacing = unit(2, "lines")) +
  theme(legend.position="right")
ggsave("1_Analysis/output/Figure4_pred_prob_plots_bergm5_new.pdf", width = 35, height = 12, units = 'cm')
## output Figure 4 in the article

## Second plot about forum participation
get_prob_of_brokered_tie_forum <- Vectorize(function(forum_type, i_j_poldist){
  
  tie_log_odds <-
    posterior[,"edges"]
  
  # take private sector as ref
  tie_log_odds <- tie_log_odds +
    # activity of broker
    posterior[,grepl(pattern = "nodeofactor.type_reduced.Private sector",
                     colnames(posterior))] +
    posterior[,grepl(pattern = "nodeifactor.type_reduced.Private sector",
                     colnames(posterior))] +
    posterior[,grepl(pattern = "nodematch.type_reduced.Private sector",
                     colnames(posterior))] +
    # broker is of broker type
    posterior[,grepl(pattern = "twopathedgcovby.poldist.type_reduced.Private sector",
                     colnames(posterior))]*
    i_j_poldist
  
  if (sum(grepl(forum_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # activity of forum members
      posterior[,grepl(pattern = paste0("nodeofactor.nrforums_ordinal.",forum_type),
                       colnames(posterior))] +
      # broker is of forum type
      posterior[,grepl(pattern = paste0("twopathedgcovby.poldist.nrforums_ordinal.",forum_type),
                       colnames(posterior))]*
      i_j_poldist
  }
  
  if (sum(grepl(forum_type,colnames(posterior))) > 0){
    tie_log_odds <- tie_log_odds +
      # popularity of receiver
      posterior[,grepl(pattern = paste0("nodeifactor.nrforums_ordinal.",forum_type),
                       colnames(posterior))]
  }
  
  tie_log_odds <- tie_log_odds +
    # forum main fx set to baseline (omitted)
    # forum interaction fx set to baseline (omitted)
    # adds first indegree tie (sample from dist later?)
    posterior[,"gwideg.fixed.0.1"]*0.1*1 +
    # adds first out degree tie
    posterior[,"gwodeg.fixed.1"]*1*1 +
    # closes mean triangles (sample from triangle dist later, summary(info_nw_directed ~ gwesp(0.1)))
    posterior[,"gwesp.fixed.0.2"]*0.2*1 +
    # attend not common forums (omit term)
    # median common number of issues
    posterior[,"edgecov.sametopicsmat_directed"]*median(sametopicsmat_directed) +
    # mean dissimilarity in expertise
    posterior[,"edgecov.dissimilmat_exp_manhattan_directed"]*
    mean(dissimilmat_exp_manhattan_directed) +
    # mean dissimilarity in policy stances between broker and to_node
    posterior[,"edgecov.poldist"]*
    mean(unlist(poldist_list)) +
    # baseline forum attendance (omit interaction term)
    # two-path main effect (i-j diff)
    posterior[,"twopathedgcov.poldist_mat"]*
    i_j_poldist +
    # two_path main effect 2 (simple two-path closing)
    posterior[,"twopathedgcov.ones_mat"]
  
  log_odds_to_prob(tie_log_odds)
})

##
post_pred_prob_df_forum <- expand.grid(seq.int(0,1,0.1),
                                       unique(atts_surveyonly$nr_forums_ordinal),
                                       stringsAsFactors = FALSE)
colnames(post_pred_prob_df_forum) <- c("i_j_poldist","forum_type")

##
post_pred_prob_df_forum$mean_prob <- apply(post_pred_prob_df_forum,1,function(x) 
  mean(get_prob_of_brokered_tie_forum(x[[2]],as.numeric(x[[1]]))))

post_pred_prob_df_forum$upper67_PI_prob <- apply(post_pred_prob_df_forum,1,function(x) 
  PI(get_prob_of_brokered_tie_forum(x[[2]],as.numeric(x[[1]])),prob = 0.67)[2])

post_pred_prob_df_forum$lower67_PI_prob <- apply(post_pred_prob_df_forum,1,function(x) 
  PI(get_prob_of_brokered_tie_forum(x[[2]],as.numeric(x[[1]])),prob = 0.67)[1])

##
plot_df4 <- aggregate(post_pred_prob_df_forum[,-c(1,2)],
                      by = list(forum_type = post_pred_prob_df_forum$forum_type,
                                i_j_poldist = post_pred_prob_df_forum$i_j_poldist),
                      FUN = mean)
##
overall_mean2 <- aggregate(post_pred_prob_df_forum[,-c(1,2)],
                           by = list(i_j_poldist = post_pred_prob_df_forum$i_j_poldist),
                           FUN = mean)
##
facet_labeller2 <- paste("k:",unique(plot_df4$forum_type))
names(facet_labeller2) <- unique(plot_df4$forum_type)

## 
ggplot(plot_df4, 
       aes(x = i_j_poldist, y = mean_prob,
           ymin = lower67_PI_prob,
           ymax = upper67_PI_prob)) + 
  geom_ribbon(alpha = 0.15, size = 0.8, 
              linetype = "blank", fill = "#C8C8C8") +
  geom_line(alpha = 0.8, size = 1) + 
  geom_line(data = overall_mean, alpha = 0.8, size = 1, color = "gray", 
            mapping = aes(y = mean_prob, x = i_j_poldist), linetype = "dotted") +
  theme_minimal() + 
  ylab("Posterior predicted probability of tie k -> j \n closing two-path i -> k -> j") +
  xlab("Standardized difference in policy beliefs between actors i and j (endpoints of two-path i -> k -> j)") +
  scale_x_continuous(breaks = c(0,1), labels = c("min","max")) +
  facet_wrap(~ factor(forum_type, levels = unique(plot_df4$forum_type),
                      labels = facet_labeller2), nrow = 1) + 
  guides(fill = FALSE, color = FALSE) + theme(strip.text = element_text(size=15),
                                              axis.title.x = element_text(size=15),
                                              axis.title.y = element_text(size=15),
                                              axis.text = element_text(size=12),
                                              panel.spacing = unit(2, "lines")) +
  theme(legend.position="right")
ggsave("1_Analysis/output/Figure5_pred_prob_plots_bergm5_forums.pdf", width = 35, height = 15, units = 'cm')
## output: Figure 5 in the article


################################################################################
## DAG approach
## Figure A1 in the article appendix
################################################################################

##############
## Dagitty code - paste into dagitty.net/dags.html for interactive exploration
## See Supplementary Materials for more information
##############

# dag {
#   diff_i_j [exposure,pos="1.045,-0.828"]
#   diff_i_k [pos="0.417,-1.608"]
#   diff_k_j [adjusted,pos="1.271,-1.538"]
#   expertise_sim_k_j [pos="0.029,-0.238"]
#   forum_sim_k_j [adjusted,pos="-1.217,1.428"]
#   issue_sim_k_j [pos="-0.149,-1.235"]
#   k_j_tie [outcome,pos="1.400,1.621"]
#   type_j [adjusted,pos="-1.369,-0.983"]
#   type_k [exposure,pos="-2.160,-0.406"]
#   type_sim_k_j [adjusted,pos="-1.374,-0.160"]
#   diff_i_j -> k_j_tie
#   diff_i_k -> diff_i_j
#   diff_k_j -> diff_i_j
#   diff_k_j -> k_j_tie
#   diff_k_j <-> issue_sim_k_j
#   expertise_sim_k_j -> diff_k_j
#   expertise_sim_k_j -> k_j_tie
#   expertise_sim_k_j <-> forum_sim_k_j
#   forum_sim_k_j -> k_j_tie
#   issue_sim_k_j -> expertise_sim_k_j
#   issue_sim_k_j -> k_j_tie
#   type_j -> k_j_tie
#   type_j -> type_sim_k_j
#   type_k -> k_j_tie [pos="-2.264,-0.041"]
#   type_k -> type_sim_k_j
#   type_sim_k_j -> diff_k_j
#   type_sim_k_j -> expertise_sim_k_j
#   type_sim_k_j -> forum_sim_k_j
#   type_sim_k_j -> issue_sim_k_j
#   type_sim_k_j -> k_j_tie
# }

