# Garden Path 01 1
# 
# last mod : 2024-02-22 mh

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
pacman::p_load(effectsize,tidyverse,haven,lme4,car,sciplot,janitor,
               BayesFactor,sjPlot,callr,htmltools,webshot,ggpubr,psycho,
               psych,lsr,PsychHelperFunctions)
set.seed(4)

# Read data

wd <- getwd()
paths <- dir(path = "data/", pattern = ".csv", full = T)
dat.raw <- lapply(paths, read.csv)

# Generate data frame
dat <- data.frame(dat.raw[1])

for (i in 2:length(dat.raw))
{
  dat <- bind_rows(dat,data.frame(dat.raw[i]))
}

# dat$Condition and dat$Conditions are the same! -> merge

dat[is.na(dat$Condition),]$Condition <- dat[is.na(dat$Condition),]$Conditions

dat$Conditions <- NULL


dat <- dat[!is.na(dat$ID),]

dat_all <- dat

# Data preparation
# Rename variables
dat_all$rt <- dat_all$key_resp_testing.rt
dat_all$pc <- dat_all$key_resp_testing.corr
dat_all$subj_id <- paste0(dat_all$date)
dat_all$gender <- dat_all$Gender.
dat_all$age <- dat_all$Age.
dat_all$relatedness <- dat_all$slider.response
dat_all$corrected_to_normal_vision <- dat_all$Do.you.have.normal.or.corrected.to.normal.vision..e.g..glasses.or.lenses...


dat_all <- dat_all %>%
  select(subj_id, gender, age, corrected_to_normal_vision, ID, Condition, ItemType,
         trials_learning.thisN, trials_testing.thisN, rt, pc, relatedness) %>%
  arrange(subj_id, ID) %>%
  group_by(subj_id) %>%
  mutate(phase = if_else(is.na(trials_testing.thisN), "learning", "testing"))


## Exclusions before analysis

dat_all %>%
  select(subj_id, corrected_to_normal_vision) %>%
  distinct(subj_id, corrected_to_normal_vision) %>%
  tabyl(corrected_to_normal_vision)
  
# Exclusion of participants
# All participants finished the study 


# Individual trials
# Exclusion of trials shorter than 0.2 sec and longer than the upper 
# boxplot criterion (1.5 IQR)



# Calculate boxplot criterion

boxplot(dat_all[dat_all$phase == "testing",]$rt)
iqr <- IQR(dat_all[dat_all$phase == "testing",]$rt)
q.3 <- quantile(dat_all[dat_all$phase == "testing",]$rt, prob = 0.75)
q.3 <- unname(q.3)

crit <- q.3 + 1.5 * iqr

# Exclusions
# total <- length(dat_all[dat_all$phase == "testing",]$rt)
# 
# dat_all <- dat_all[dat_all$rt >= 0.2 & dat_all$rt <= crit,]
# 
# total_excl <- length(dat_all[dat_all$phase == "testing",]$rt)
# 
# excl <- total - total_excl
# excl/total*100
# 
# boxplot(dat_all[dat_all$phase == "testing",]$rt)
# 
# ## 24 trials excluded
# # Exclusion of participants with less than 75% of trials
# 
# print(dat %>%
#   group_by(subj_id) %>%
#   count(), n = 50)
# 


# Exclusion of participant number 


## Demographic data
gdr <-
  (dat_all %>%
  group_by(subj_id, gender) %>%
    filter(corrected_to_normal_vision == "Yes") %>%
  count ())

gender <- gdr$gender
 table(gender)

 # N = 85 (with normal of corrected-to-normal vision)
 # gender
 # Female              Male Prefer not to say 
 # 57                27                 1 

age <- (dat_all %>%
        group_by(subj_id, age) %>%
          filter(corrected_to_normal_vision == "Yes") %>%
        count ())
age <- age$age

mean(age)     # 45.34118
sd(age)       # 13.9485

# Relatedness : Influence of context only

# Remove everything but the relevent trials


dat_all %>%
  filter(phase == "learning", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, relatedness) %>%
  group_by(Condition) %>%
  summarise(mean_relatedness = mean(relatedness),
            sd_relatedness = sd(relatedness))

dat_all %>%
  filter(phase == "learning", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, relatedness) %>%
  group_by(subj_id, Condition) %>%
  ggerrorplot(x = "Condition", y = "relatedness",
              desc_stat = "mean_sd",
              error.plot = "errorbar",
              add = "mean")

dat_all %>%
  filter(phase == "learning", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, relatedness) %>%
  group_by(subj_id, Condition) %>%
  mutate(relatedness = jitter(relatedness, amount = .5)) %>%
  ggviolin(x = "Condition", y = "relatedness",
           add = "")

plot_A <- 
dat_all %>%
  filter(phase == "learning", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, relatedness) %>%
  group_by(subj_id, Condition) %>%
  ggboxplot(x = "Condition", y = "relatedness") + 
  ylab("Relatedness")

m1 <- lmer(relatedness ~ Condition + (1|subj_id) + (1|ID), 
           data = dat_all[dat_all$phase == "learning" &
                          dat_all$corrected_to_normal_vision == "Yes",])
Anova(m1)
tab_model(m1)
plot_model(m1)

library(BayesFactor)
ttestBF(x = dat_all[dat_all$Condition == "fitting" & 
                    dat_all$phase == "learning" & 
                     dat_all$corrected_to_normal_vision == "Yes",]$relatedness,
        y = dat_all[dat_all$Condition == "unfitting"& 
                      dat_all$phase == "learning" & 
                      dat_all$corrected_to_normal_vision == "Yes",]$relatedness,
        paired = TRUE)



# Proportion correct: Influence of context only

dat_all %>%
  filter(phase == "testing", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, ItemType) %>%
  arrange(subj_id, ID) %>%
  group_by(subj_id) %>%
  mutate(Condition = ifelse(is.na(Condition),  
                            sample(c(rep("fitting",935), rep("unfitting",935))), 
                            Condition)) %>%
  tabyl(Condition, ItemType)

dat <- dat_all %>%
  filter(phase == "testing", corrected_to_normal_vision == "Yes") %>%
  select(subj_id, ID, Condition, ItemType, pc) %>%
  arrange(subj_id, ID) %>%
  group_by(subj_id) %>%
  mutate(Condition = ifelse(Condition == "",  
                            sample(c(rep("fitting",935), rep("unfitting",935))), 
                            Condition)) 

dat %>%
  select(subj_id, ID, ItemType, Condition, pc) %>%
  group_by(Condition) %>%
  summarise(mean_pc = mean(pc),
            sd_pc = sd(pc))

dat %>%
  select(subj_id, ItemType, Condition, pc) %>%
  group_by(Condition) %>%
  ggerrorplot(x = "Condition", y = "pc",
              desc_stat = "mean_se",
              error.plot = "errorbar",
              add = "mean")

m1 <- glmer(pc ~ Condition + (1|subj_id) + (1|ID), 
            data = dat, 
            family = binomial(link = "logit"))
Anova(m1)
tab_model(m1)

# Signal detection theory 

library(dplyr)

calculate_SDT_metrics <- function(data) {
  data %>%
    group_by(subj_id, Condition) %>%
    summarise(
      Hits = sum(ItemType == 'target' & pc == 1),
      Misses = sum(ItemType == 'target' & pc == 0),
      FalseAlarms = sum(ItemType == 'distractor' & pc == 0),
      CorrectRejections = sum(ItemType == 'distractor' & pc == 1),
      .groups = 'drop'
    ) %>%
    mutate(
      # Correct for perfect performance and avoiding mistakes
      Hits = ifelse(Hits == (Hits + Misses), Hits - 0.5, Hits),
      FalseAlarms = ifelse(FalseAlarms == 0, FalseAlarms + 0.5, FalseAlarms),
      
      # Calculate d' and c
      dprime = qnorm((Hits + 0.5) / (Hits + Misses + 1)) - qnorm((FalseAlarms + 0.5) / (FalseAlarms + CorrectRejections + 1)),
      bias = -0.5 * (qnorm((Hits + 0.5) / (Hits + Misses + 1)) + qnorm((FalseAlarms + 0.5) / (FalseAlarms + CorrectRejections + 1)))
    )
}

result <- calculate_SDT_metrics(dat)
print(result)

result %>%
  group_by(Condition) %>%
  summarise(mean_dprime = mean(dprime),
            sd_dprime = sd(dprime),
            mean_bias = mean(bias),
            sd_bias = sd(bias))

t.test(dprime ~ Condition, paired = TRUE, data = result)
cohens_d_paired(t = 5.7519, n = 85)

t.test(bias ~ Condition, paired = TRUE, data = result)
cohens_d_paired(t = -5.7764, n = 85)

plot_B <- 
result %>%
  group_by(subj_id, Condition) %>%
  ggboxplot(x = "Condition", y = "dprime") +
  ylab("Sensitivity [d']")


human_data_plot <- ggarrange(plot_A, plot_B, labels = c("C","D"))
annotate_figure(human_data_plot, top = text_grob("Human data", 
                                                   color = "black", face = "bold", size = 12))
ggsave("plot_human_data.png", width = 1800, height = 1200, units = "px")

result %>%
  group_by(Condition) %>%
  summarize(mean_dprime = mean(dprime),
            sd_dprime = sd(dprime),
            mean_bias = mean(bias),
            sd_bias = sd(bias))


library(BayesFactor)
ttestBF(x = result[result$Condition == "fitting",]$dprime,
        y = result[result$Condition == "unfitting",]$dprime,
        paired = TRUE)

ttestBF(x = result[result$Condition == "fitting",]$bias,
        y = result[result$Condition == "unfitting",]$bias,
        paired = TRUE)

# Correlation of relatedness and pc

dat_rel <- 
dat_all %>%
  group_by(ID, Condition) %>%
  filter(phase == "learning") %>%
  summarise(mean_rel = mean(relatedness, na.rm = TRUE))

dat_pc <- 
  dat %>%
  group_by(ID, Condition) %>%
  summarise(mean_pc = mean(pc, na.rm = TRUE))

dat_cor <- left_join(dat_rel, dat_pc)

mod_cor <- lmer(mean_pc ~ Condition * mean_rel + (1|ID), dat_cor)
Anova(mod_cor)

plot_mechanism_B <- 
  ggscatter(dat_cor, x = "mean_rel", y = "mean_pc", shape = "Condition",
          color = "Condition", add = "reg.line", palette = "aaas",
          fullrange = TRUE, conf.int = FALSE, rug = FALSE) +
  stat_cor(aes(color = Condition), method = "pearson",  
           r.accuracy = .01, p.accuracy = .001,
           label.x.npc = "middle", label.y.npc = "bottom")  +
  ylim(c(0.75,1)) + xlim(c(1,10)) + ylab("Proportion correct") + xlab("Relatedness")

plot_mechanism_B <- ggarrange(plot_mechanism_B, labels = c("B"))
annotate_figure(plot_mechanism_B, top = text_grob("Human data", 
                                                 color = "black", face = "bold", size = 12))
ggsave("plot_mechanism_B.png", width = 1000, height = 1200, units = "px")

