# Polarisation sub-model for *Inconsistent belief aggregation in diverse and polarised groups*

In [None]:
# Install packages used in this notebook
%pip install taupy numpy pandas statsmodels seaborn matplotlib ptitprince networkx

## Packages

In [None]:
# theory of dialectical structures
from taupy import *

# system tools
import random
from concurrent.futures import ProcessPoolExecutor

# data analysis & storage
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

# visuals
import seaborn as sea
import matplotlib.pyplot as plt
import ptitprince as pt
import networkx as nx

## Functions to perform polarisation experiments

In [None]:
def multiprocess_polarisation_experiment(
    n=5,
    *,
    max_workers=None,
    settings={}
):
    """
    A helper function to perform experiments on multiple CPUs. The
    number of experiments is given by `n`, and the maximum number
    of CPUs used at the same time by `max_workers`. The `settings` 
    are passed on to the individual `polarisation_experiment()`s.
    """
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        sim_results = [executor.submit(
            polarisation_experiment,
            **settings
        ) for _ in range(n)]

    return pd.concat(
        [i.result() for i in sim_results],
        ignore_index=True
        )

In [None]:
def polarisation_experiment(
    *,
    list_of_densities = [0.4, 0.5, 0.6, 0.7, 0.8],
    list_of_distances = [i/20 for i in [0,1,2,3,4,6,12,14,16,17,18,19,20]],
    density_factor = 5,
    population_size = 51
):
    """
    Perform a single polarisation experiment. The number of argument maps 
    generated per density point is given by `density_factor`. `list_of_densities`
    contains the actual density points for which we generate argument maps.
    
    The items in `list_of_distances` control how our polarised agent samples are 
    drawn (see code comments below). Their values depend on the number of sentences
    under discussion (20 hardcoded in this model for computational efficiency).
    """
    results = []

    for de in list_of_densities * density_factor:
        
        # Generate an argument map
        m = generate_hierarchical_argument_map(max_density=de)
        # Obtain all belief systems that respect all validity constraints.
        s = satisfiability(m, all_models=True)
        # Calculate the distances between all validity-respecting beliefs.
        m1 = difference_matrix(
            measure=normalised_hamming_distance,
            positions=s).astype("float64")

        # Determine distances that actually exist among the population base,
        # i.e., all validity-respecting beliefs, and that match our desired
        # distances between agents
        unique_distances = set(np.unique(m1)) & set(list_of_distances)
        for d in unique_distances:
            # Pick a random pair r for each unique distance d
            r = random.choice(np.argwhere(m1 == d))
            # The initial population consists of these two agents only.
            # Note that we are storing agent indices, not the agent belief
            # systems themselves, in `current_pop`.
            current_pop = [r[0], r[1]]
            
            while len(current_pop) < population_size:
                # Draw agents until we reach the desired sample size
                random_element = random.choice(current_pop)
                # New agents have the desired distance to a random member
                # of the currently existing sample.
                new_element = random.choice(
                    [p[0] for p in np.argwhere(m1[random_element] == d)]
                )
                current_pop += [new_element]

            # Complete sample generation by retrieving the actual belief
            # systems from the stored indices.
            population = [s[p] for p in current_pop]
            
            # Vote on the group opinion through majoritarian aggregation.
            rt_pos = Position(m,
                        aggregated_position_of_winners(population)
                        )
            
            # Check whether the majority opinion is consistent.
            rt_val = rt_pos.is_coherent()
            
            # The mean distance between drawn agents and their majority.
            mean_distance = difference_matrix(
                    measure=normalised_hamming_distance,
                    positions=population
                ).astype("float64").mean()
            
            # The mean agreement among sampled agents 
            mean_agr = difference_matrix(
                positions=population,
                measure=bna
            ).astype("float64").mean()

            # How many unique belief systems did we draw?
            number_of_unique_positions = len([
                dict(i) for i in set(frozenset(position.items())
                                        for position in population)
                ])
            
            # How many belief systems exactly match the majority opinion? (in percent)
            maj_holders = len([p for p in population if p == rt_pos]) / len(population)
            
            # Store all obtained values.
            results.append(
                    [rt_val,
                    maj_holders, 
                    pairwise_dispersion(population, measure=normalised_hamming_distance),
                    de,
                    mean_distance,
                    population,
                    mean_distance,
                    number_of_unique_positions,
                    mean_agr]
                )

    # Pandas data management
    return pd.DataFrame(
        results, 
        columns=["consistency", 
                 "holders of majority opinion", 
                 "dispersion", 
                 "inferential density", 
                 "mean distance to majority", 
                 "population", 
                 "mean distance", 
                 "unique positions",
                 "mean agreement"]
                )


# Single model run

We perform a single model run with just a couple of argument maps. The polarisation sub-model works a bit differently from the diversity sub-model, so we need a different number of iterations to achieve a comparable amount of data.

In [None]:
data_single_run = polarisation_experiment(
    density_factor = 3, 
    list_of_densities = [0.5, 0.8],
    )

In [None]:
g = sea.FacetGrid(data_single_run, col="inferential density", aspect=4/5)
g.map(sea.swarmplot, "consistency", "dispersion", size=6, dodge=True, color="k", order=None)
plt.savefig("SingleModelRun-Polarisation.png",bbox_inches="tight",dpi=300)

# Quantitative analysis of many runs

In [None]:
df1 = multiprocess_polarisation_experiment(
    n=20,
    settings= {
        "density_factor": 15, 
        "list_of_densities": [0.4, 0.5, 0.6, 0.7, 0.8],
        "list_of_distances": [i/20 for i in [0,1,2,4,6,8,9,10,12,14,16,17,18,19,20]],
    }
    )

The polarisation sub-model does not automatically generate equal amounts of data for each density–polarisation region. We control data selection by ensuring about equally many data points are drawn from each polarisation decile:

In [None]:
df1["decile"] = df1["dispersion"].round(1)

In [None]:
df1.groupby(["decile"]).size()

In [None]:
df2 = df1.groupby(["inferential density", "decile"]).apply(lambda x: x.sample(n = min(250, x.shape[0])))
len(df2)

In [None]:
sea.set_style("ticks",{'axes.grid' : True}) # Produce horizontal grid guidelines
f, ax = plt.subplots(figsize=(10, 4))

ax=pt.RainCloud(hue = "consistency", 
                y = "dispersion", 
                x = "inferential density", 
                data = df2,
                width_viol = .5, ax = ax, alpha = .5, dodge = True,
                palette=sea.color_palette(n_colors=2), cut=0,
                scale="count",

)
ax.legend().remove() # Overwrite default legend
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles[0:len(labels)//3], 
           #labels[0:len(labels)//3], # Default labels “False” and “True” for consistency status
           ["Inconsistent", "Consistent"], #Use custom labels for “False” and “True”
           loc="upper center", 
           bbox_to_anchor=(0.5, 1.25), ncol=3, 
           borderaxespad=0.,
           title = "Consistency of majority opinion")

plt.savefig("Experiment-Polarisation.png",bbox_inches="tight",dpi=300)

# Statistical analysis

To better interpret the “unit of change” parameter in the Logit model, we calculate the distance to medium polarisation (at 0.5 dispersion) and multiply that number by 100% so that a unit of change refers to a 1% change in dispersion.

In [None]:
df2["normdisp"] = abs(df2["dispersion"]-0.5)*100
df2["consistencyNum"] = df2["consistency"].astype(int)

In [None]:
logit_model = smf.logit("consistencyNum ~ normdisp", data=df2).fit()
logit_model.summary()

We measure Cohen's $f^2$: $$f^2=\frac{R^2}{1-R^2}$$

In [None]:
logit_model.prsquared / (1-logit_model.prsquared)

Obtain the $\chi^2$ value and $p$ to check the models significance.

In [None]:
{"Chisq": logit_model.llr, "p": logit_model.llr_pvalue}

# Summary data tables

In [None]:
df3 = df1[["dispersion", "consistency"]].copy()

In [None]:
df3["interval"] = pd.cut(df1["dispersion"], bins=[0, 0.25, 0.5, 0.75, 1], include_lowest=True)

In [None]:
df3.groupby(["interval"]).mean()