# Data analysis script for 'The relationships between exercise and mood: A naturalistic, longitudinal study of recreational runners'
# NB. The variable day.type is whether it was a Run day, Next day or Baseline
# The variable Fitness is best age-adjusted 5k time
# The variable Run.time.difference is the time from the end of the run to the affective report

### Preliminaries####
# libraries 
library(nlme)
library(psych)
library(ggplot2)
library(MASS)
library(gridExtra)
library(dplyr)


# Read data
d=read.csv("Bonham.publication.data.csv")
one.run.days=subset(d, Run==1)
run.days=subset(d, day.type=="Run day")

### Descriptive statistics#### 
length(d$Valence[!is.na(d$Valence)])
length(d$Arousal[!is.na(d$Arousal)])
xtabs(~d$Run)
xtabs(~d$Run[!is.na(d$Valence)])
xtabs(~d$Run+d$Participant.number)
describe(d$Total.runs)
length(unique(d$Participant.number[d$Gender==1]))
length(unique(d$Participant.number[d$Gender==2]))
mean(summarise(group_by(d, Participant.number), mean=mean(Fitness))$mean, na.rm=TRUE)
sd(summarise(group_by(d, Participant.number), mean=mean(Fitness))$mean, na.rm=TRUE)
describe(d$Valence)
describe(d$Arousal)
describe(d$Dose)
xtabs(~d$day.type)
cor.test(d$Valence, d$Arousal, use="complete.obs")
summary(lm(Arousal~abs(Valence), data=d))
boxplot(d$Arousal~d$Valence)

fig1=ggplot(d[!is.na(d$Valence), ], aes(x=Valence, y=Arousal)) +
  geom_violin(aes(group=as.factor(Valence)), fill="yellow")  +
  geom_smooth(method="loess", aes(x=Valence, y=Arousal)) + 
  xlab("Valence") + 
  ylab("Arousal")
  theme_bw()
fig1
png("Figure1.png", res=300, units="in", width=6, height=4)
fig1
dev.off()

part.summary=summarise(group_by(d[!is.na(d$Dose), ], Participant.number), n=mean(Total.runs), Valence=mean(Valence, na.rm=TRUE), Arousal=mean(Arousal, na.rm=TRUE), Fitness=mean(Fitness), Dose=mean(Dose, na.rm=TRUE))
cor.test(part.summary$Fitness, part.summary$Dose)
cor.test(part.summary$Dose, part.summary$n)
cor.test(part.summary$Fitness, part.summary$n)
describe(part.summary$Fitness)

#### Affect and time of day#####
describe(d$Time.of.reply)
d$Time.of.day[d$Time.of.reply<12]="Morning" 
d$Time.of.day[d$Time.of.reply>=12 & d$Time.of.reply<18]="Afternoon" 
d$Time.of.day[d$Time.of.reply>=18]="Evening" 
is.numeric(d$Time.of.day)
d$Time.of.day=as.factor(d$Time.of.day)
d$Time.of.day=relevel(d$Time.of.day, ref="Morning")
View(summarize(group_by(d, Time.of.day), meanv=mean(Valence, na.rm=TRUE), sdv=sd(Valence, na.rm=TRUE), meana=mean(Arousal, na.rm=TRUE), sda=sd(Arousal, na.rm=TRUE)))
m=lme(Valence~Time.of.day, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
summary(m)
m=lme(Arousal~Time.of.day, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
summary(m)

### Affective valence in relation to running####
# Model 1: Just day type
m1=lme(Valence~day.type, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
summary(m1)
intervals(m1)
mnull=lme(Valence~1, random=~1|Participant.number, data=subset(d, !is.na(Time.since.run)), na.action=na.omit, method="ML")
# PRV
(summary(mnull)$sigma^2-summary(m1)$sigma^2)/summary(mnull)$sigma^2

# Figure 2a
Vsummary=summarise(group_by(d, day.type), meanV=mean(Valence, na.rm=TRUE), seV=describe(Valence)$se)
fig2a=ggplot(Vsummary, aes(x=day.type, y=meanV)) + 
  theme_bw() + 
  geom_bar(position="dodge", stat="identity", fill="red4", colour="black") + 
  geom_errorbar(aes(ymin=meanV-seV, ymax=meanV+seV), width=0.2) + 
  ylab("Valence") + 
  xlab("Day type") + 
  scale_x_discrete(limits=c("Baseline", "Next day", "Run day")) + 
  coord_cartesian(ylim=c(0, 3.5)) + 
  annotate("text", label="A", x=1, y=3.4, size=5)
fig2a


# Causation versus selection (model 2)
boxplot(one.run.days$Valence~(one.run.days$Run.time.difference<0))
describeBy(one.run.days$Valence, group=(one.run.days$Run.time.difference<0))
mx=lme(Valence~I(Run.time.difference<0), random=~1|Participant.number, data=one.run.days, na.action=na.omit, method="ML")
summary(mx)
intervals(mx)
# PRV for mx
mxnull=lme(Valence~1, random=~1|Participant.number, data=subset(one.run.days, !is.na(Run.time.difference)), na.action=na.omit, method="ML")
(mxnull$sigma^2 - mx$sigma^2)/mxnull$sigma^2

# Model 3 - Select best model by stepAIC
max.model=lme(Valence~day.type+scale(Dose)+scale(Fitness)+Gender+day.type:scale(Fitness) + 
                day.type:scale(Dose) + day.type:Gender, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
step=stepAIC(max.model, direction="both")
summary(step)
intervals(step)
# PRV for the interaction between time and fitness
stepnull2=update(step, .~. - day.type:scale(Fitness))
(stepnull2$sigma^2 - step$sigma^2)/stepnull2$sigma^2
# PRV for the main effect of fitness
mprv1=lme(Valence~day.type+scale(Fitness), random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
mprv2=lme(Valence~day.type, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")


# Figure 2b to investigate the interactions
d$FitnessMedianSplit=(d$Fitness>mean(d$Fitness, na.rm=TRUE))
d$FitnessMedianSplit=as.character(d$FitnessMedianSplit)
d$FitnessMedianSplit[d$FitnessMedianSplit=="TRUE"]="Higher"
d$FitnessMedianSplit[d$FitnessMedianSplit=="FALSE"]="Lower"

Vsummary3=summarise(group_by(d, day.type, FitnessMedianSplit), meanV=mean(Valence, na.rm=TRUE), seV=describe(Valence)$se)
Vsummary3=subset(Vsummary3, !is.na(FitnessMedianSplit))
Vsummary3$day.type=as.character(Vsummary3$day.type)
colnames(Vsummary3)[2]="Running.fitness"

fig2b=ggplot(Vsummary3, aes(x=day.type, y=meanV, fill=Running.fitness)) + 
  theme_bw() + 
  labs(fill="Running fitness") + 
  geom_bar(position="dodge", stat="identity", colour="black") + 
  geom_errorbar(aes(ymin=meanV-seV, ymax=meanV+seV), width=0.2, position=position_dodge(width=0.9)) + 
  ylab("Valence") + 
  xlab("Day type") + 
  scale_x_discrete(limits=c("Baseline", "Next day", "Run day")) + 
  scale_fill_manual(values=c("red2", "grey")) +
  annotate("text", label="B", x=1, y=3.4, size=5) + 
  coord_cartesian(ylim=c(0, 3.5)) 
fig2b

png("figure2.png", res=300, units="in", width=8, height=4)
grid.arrange(fig2a, fig2b, widths=c(4,6))
dev.off()

# Looking at time elapsed on run days (model 4)
rt1=lme(Valence~I(-1*Run.time.difference), random=~1|Participant.number, data=subset(run.days, Run.time.difference<0), na.action=na.omit, method="ML")
rtnull=lme(Valence~1, random=~1|Participant.number, data=subset(run.days, Run.time.difference<0), na.action=na.omit, method="ML")
summary(rt1)
intervals(rt1)
(summary(rtnull)$sigma^2-summary(rt1)$sigma^2)/summary(rtnull)$sigma^2
coefficients(rt1)[1, 2]*60
(coefficients(rt1)[1, 2]*60)/sd(run.days$Valence, na.rm=TRUE)

plot(rt1)
hist(rt1$residuals)
fig3=ggplot(subset(run.days, Run.time.difference<0), aes(x=-1*Run.time.difference, y=Valence)) + 
  theme_bw() + 
  geom_point() + 
  geom_smooth(method="lm") + 
  xlab("Time since run (mins)") + 
  scale_y_continuous(breaks=-5:5)
fig3

png("figure3.png", res=300, units="in", width=4, height=4)
fig3
dev.off()



### Arousal in relation to running####
# Model 1: Just day type
m1=lme(Arousal~day.type, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
summary(m1)
intervals(m1)
# PRV
m1null=lme(Arousal~1, random=~1|Participant.number, data=subset(d, !is.na(Time.since.run)), na.action=na.omit, method="ML")
(m1null$sigma^2 - m1$sigma^2)/m1null$sigma^2

# Figure 4
Asummary=summarise(group_by(d, day.type), meanA=mean(Arousal, na.rm=TRUE), seA=describe(Arousal)$se)
fig4a=ggplot(Asummary, aes(x=day.type, y=meanA)) + 
  theme_bw() + 
  geom_bar(position="dodge", stat="identity", fill="olivedrab4") + 
  geom_errorbar(aes(ymin=meanA-seA, ymax=meanA+seA), width=0.2) + 
  ylab("Arousal") + 
  xlab("Day type") + 
  scale_x_discrete(limits=c("Baseline", "Next day", "Run day")) + 
  coord_cartesian(ylim=c(0,4)) + 
  annotate("text", x=1, y=3.9, label="A", size =5)
fig4a


# Causation versus selection (model 2)
boxplot(one.run.days$Arousal~(one.run.days$Run.time.difference<0))
describeBy(one.run.days$Arousal, group=(one.run.days$Run.time.difference<0))
mx=lme(Arousal~I(Run.time.difference<0), random=~1|Participant.number, data=one.run.days, na.action=na.omit, method="ML")
summary(mx)
intervals(mx)
# PRV
mxnull=lme(Arousal~1, random=~1|Participant.number, data=subset(one.run.days, !is.na(Run.time.difference)), na.action=na.omit, method="ML")
(mxnull$sigma^2 - mx$sigma^2)/mxnull$sigma^2

# Model 3 - Select best model by stepAIC
d$Gender=as.factor(d$Gender)
max.modelA=lme(Arousal~day.type+scale(Dose)+scale(Fitness)+Gender+day.type:scale(Fitness) + 
                day.type:scale(Dose) + day.type:Gender, random=~1|Participant.number, data=d, na.action=na.omit, method="ML")
stepA=stepAIC(max.modelA, direction="both")
summary(stepA)
intervals(stepA)
# PRV
stepAnull=update(stepA, .~. - scale(Dose): day.type)
summary(stepAnull)
(stepAnull$sigma^2-stepA$sigma^2)/stepAnull$sigma^2

# Figure 4B
d$DoseMedianSplit=(d$Dose>median(d$Dose, na.rm=TRUE))
d$DoseM[d$DoseMedianSplit==TRUE]="High"
d$DoseM[d$DoseMedianSplit==FALSE]="Low"
Asummary2=summarise(group_by(d, day.type, DoseM), meanA=mean(Arousal, na.rm=TRUE), seA=describe(Arousal)$se)
Asummary2=subset(Asummary2, !is.na(DoseM))
Asummary2$day.type=as.character(Asummary2$day.type)
colnames(Asummary2)[2]="Dose"

fig4b=ggplot(Asummary2, aes(x=day.type, y=meanA, fill=Dose)) + 
  theme_bw() + 
  geom_bar(position="dodge", stat="identity") + 
  geom_errorbar(aes(ymin=meanA-seA, ymax=meanA+seA), width=0.2, position=position_dodge(width=0.9)) + 
  ylab("Arousal") + 
  xlab("Day type") + 
  scale_fill_manual(values=c("darkgreen", "green")) +   
  scale_x_discrete(limits=c("Baseline", "Next day", "Run day")) + 
  coord_cartesian(ylim=c(0,4)) + 
  annotate("text", x=1, y=3.9, label="B", size =5)

fig4b


png("figure4.png", res=300, units="in", width=8, height=4)
grid.arrange(fig4a, fig4b, widths=c(4, 6))
dev.off()

# Arousal and time since run for run days only 
rta1=lme(Arousal~I(-1*Run.time.difference), random=~1|Participant.number, data=subset(run.days, Run.time.difference<0), na.action=na.omit, method="ML")
rtanull=lme(Arousal~1, random=~1|Participant.number, data=subset(run.days, Run.time.difference<0), na.action=na.omit, method="ML")
summary(rta1)
intervals(rta1)
(summary(rtanull)$sigma^2-summary(rta1)$sigma^2)/summary(rtanull)$sigma^2
coefficients(rta1)[1, 2]*60
(coefficients(rta1)[1, 2]*60)/sd(run.days$Arousal, na.rm=TRUE)
fig5=ggplot(subset(run.days, Run.time.difference<0), aes(x=-1*Run.time.difference, y=Arousal)) + 
  theme_bw() + 
  geom_point() + 
  geom_smooth(method="lm") + 
  xlab("Time since run (mins)") + 
  scale_y_continuous(breaks=0:6)
fig5

png("figure5.png", res=300, units="in", width=4, height=4)
fig5
dev.off()
