# Study: Garden path sentences w/ vs. w/o context (machine data)
# Ela Ulakci & Markus Huff
# 
# last mod: 2024-02-22 mh
# 
# General idea: We presented ChatGPT with a garden path sentence preceding by fitting or an unfitting sentence. And measured event boundary perception as well as relatedness, comprehensibility and memorability of the garden path sentence. The research question is if the context (fitting vs. unfitting preceding sentence) influences the processing of the garden path sentence.
# 
# 44 garden path sentences.
# 
# Variables:
# - Context: Fitting (introducing the context of the garden path sentence) vs. unfitting (not introducing the context of the garden path sentence)
# - Example:
#   - Sentence 1 (fitting): "Bill has chronic alcoholism."
#   - Sentence 1 (unfitting): "Bill likes to play golf."
#   - Sentence 2: "Because Bill drinks wine is never kept in the house."
# 
# Measures:
# - EB between: Natural grained event boundary between the 2 sentences. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a a meaningful event boundary (i.e. points where the meaning changes) between Sentence 1 and Sentence 2?)
# - EB within: Natural grained event boundary within the garden path sentence. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a a meaningful event boundary (i.e. points where the meaning of the sentences changes) in Sentence 2?)
# - Relatedness: "Read Sentence 1 and Sentence 2 and answer the following question. How related are the two sentences from 1 (not at all) to 10 (highly)?"
# - Comprehensibility: Read Sentence 1 and Sentence 2 and answer the following question. How do you rate the comprehensibility of  Sentence 2 from 1 (not at all) to 10 (excellent)?
# - Memorability: Read Sentence 1 and Sentence 2 and answer the following question. How do you rate the memorability of  Sentence 2 from 1 (not at all) to 10 (excellent)?
# - EB between coarse: Coarse grained event boundary between the 2 sentences. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a coarse-grained event boundary between Sentence 1 and Sentence 2 (i.e. a point indicating a large change in the meaning of the sentence)? Answer just with "yes" or "no".)
# - EB between fine: Fine grained event boundary between the 2 sentences. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a fine-grained event boundary between Sentence 1 and Sentence 2 (i.e. a point indicating a small change in the meaning of the sentence)? Answer just with "yes" or "no".)
# - EB within coarse: Coarse grained event boundary within the garden path sentence. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a coarse-grained event boundary in Sentence 2 (i.e. a point indicating a large change in the meaning of the sentence)? Answer just with \"yes\" or \"no\".)
# - EB within fine: Fine grained event boundary within the garden path sentence. (Read Sentence 1 and Sentence 2 and answer the following question. Is there a fine-grained event boundary in Sentence 2 (i.e. a point indicating a small change in the meaning of the sentence)? Answer just with "yes" or "no".)
# - Meaning: "Read Sentence 1 and Sentence 2 and decide the following question. Which interpretation of Sentence 2 is correct, Answer A or Answer B? Answer just with "A" or "B". 
#                                                                                     - A was always the standard interpretation provided by the fitting context. For example:
#                                                                                       - Answer A: "Because Bill drinks so much alcohol wine is never kept in the house."
# In the present analysis, we focus on Context, Relatedness, and Memorability.                                                                                   - Answer B: "Because Bill drinks wine it is never kept in the house."

# Set working directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# Load packages
pacman::p_load(tidyverse,ez,lme4,car,sjPlot,ggpubr,rstatix,stringi,epitools,car)

# Load data
dat <- read_csv("dat.csv")

# Extract sentence pairs

tmp <- 
dat %>%
  select(Prompt_EB_between_coarse,  value_EB_between_coarse,
         Prompt_memorability, value_memorability) %>%
  group_by(Prompt_EB_between_coarse,
           Prompt_memorability) %>%
  summarise(mean_memorability = mean(value_memorability),
            mean_prob_boundary = mean(value_EB_between_coarse)) %>%
  ungroup() %>%
  select(Prompt_memorability, mean_memorability, mean_prob_boundary) %>%
  mutate(Prompt_memorability = str_squish(substring(Prompt_memorability, 153)))
#                               "Read Sentence 1 and Sentence 2 and answer the following question. Is there a a meaningful event boundary (i.e. points where the meaning changes) in Sentence 2? "))))
  

# First checks on the data

dat %>%
  group_by(Condition) %>%
  count()

# Plots

plot_A <- 
  dat %>%
  select(ID, Condition, value_relatedness) %>%
  group_by(Condition) %>%
  mutate(value_relatedness = jitter(value_relatedness, amount = .5)) %>%
  ggboxplot(x = "Condition", y = "value_relatedness") +
  ylab("Relatedness")

plot_B <- 
dat %>%
  select(ID, Condition, value_memorability) %>%
  group_by(Condition) %>%
  mutate(value_memorability = jitter(value_memorability,amount = .5)) %>%
  ggboxplot(x = "Condition", y = "value_memorability") +
  ylab("Memorability")

machine_data_plot <- ggarrange(plot_A, plot_B, labels = "AUTO")
annotate_figure(machine_data_plot, top = text_grob("Machine data", 
                                      color = "black", face = "bold", size = 12))
ggsave("plot_machine_data.png", width = 1800, height = 1200, units = "px")

# Models

m1 <- lmer(value_relatedness ~ Condition + (1 | ID), dat)
Anova(m1)

m2 <- lmer(value_memorability ~ Condition + (1 | ID), dat)
Anova(m2)
tab_model(m2)


# Correlation of relatedness and pc

mod_cor <- lmer(value_memorability ~ Condition * value_relatedness + (1|ID), dat)
Anova(mod_cor)

plot_mechanism_A <- 
ggscatter(dat, x = "value_relatedness", y = "value_memorability", shape = "Condition",
          color = "Condition", add = "reg.line", palette = "aaas",
          fullrange = TRUE, conf.int = FALSE, rug = FALSE, point = FALSE) +
  geom_jitter(aes(color = Condition, palette = "aaas"), alpha = 0.1) +
  stat_cor(aes(color = Condition), method = "pearson",  
           r.accuracy = .01, p.accuracy = .001,
           label.x.npc = "left", label.y.npc = "top")  +
  ylim(c(1,10)) + xlim(c(1,10)) + ylab("Memorability") + xlab("Relatedness")

plot_mechanism_A <- ggarrange(plot_mechanism_A, labels = c("A"))
annotate_figure(plot_mechanism_A, top = text_grob("Machine data", 
                                                  color = "black", face = "bold", size = 12))
ggsave("plot_mechanism_A.png", width = 1000, height = 1200, units = "px")

