library(tidyverse)
library(ggplot2)
library(ggpubr)
library(viridis)
library(readr)
library(car)

data <- read_csv("data_cleaned.csv")
data$dive=1:nrow(data)

profiles = data %>% select(diveprofile_5_min:diveprofile_95_min)
profiles$dive=1:nrow(profiles) #add id

# Melt the data frame from wide to long format
profiles_long <- pivot_longer(profiles, cols = starts_with("diveprofile"), names_to = "time", values_to = "depth")

# Convert time to numeric (assuming time intervals are consistent and in minutes)
profiles_long <- profiles_long %>%
  mutate(time = as.numeric(gsub("diveprofile_|_min", "", time)),
         depth = as.numeric(depth))

profiles_long %>% print(n=50)

# Filter out NA depth values for max depth and interval calculations
profiles_cleaned <- profiles_long %>%
  filter(!is.na(depth))



# Calculate max depth and depth intervals without NAs
profiles_cleaned <- profiles_cleaned %>%
  mutate(depth_interval = cut(depth, breaks = c(0, 10, 20, 30, 40, Inf), 
                              labels = c("0-10", "10-20", "20-30", "30-40", "40+"), 
                              right = FALSE)) %>%
  group_by(dive) %>%
  mutate(max_depth = max(depth, na.rm = TRUE)) %>%
  ungroup()


# Step 1: Identify the time at which the maximum depth was reached for each dive
max_depth_times <- profiles_cleaned %>%
  group_by(dive) %>%
  summarize(max_depth_time = min(time[depth == max(depth)]))

# Step 2: Filter the data to include only the time points after the maximum depth was reached
profiles_after_max_depth <- profiles_cleaned %>%
  inner_join(max_depth_times, by = "dive") %>%
  filter(time > max_depth_time)

# Step 3: Calculate the time spent in different depth ranges after the maximum depth was reached
time_in_depth_ranges <- profiles_after_max_depth %>%
  group_by(dive, depth_interval) %>%
  summarize(time_spent = sum(time - lag(time, default = first(time))))

time_in_depth_ranges_total <- profiles_cleaned %>%
  group_by(dive, depth_interval) %>%
  summarize(time_spent = sum(time - lag(time, default = first(time))))


# Display the results
print(time_in_depth_ranges)
colnames(time_in_depth_ranges) = c("dive", "after_max_depth_depth_interval", "after_max_depth_time_at_intervall")

time_in_depth_ranges %>% filter(after_max_depth_depth_interval == "0-10") %>% select(after_max_depth_time_at_intervall_0_10 = after_max_depth_time_at_intervall)

data = left_join(data,
                 time_in_depth_ranges %>% filter(after_max_depth_depth_interval == "0-10") %>%
                   select(after_max_depth_time_at_intervall_0_10 = after_max_depth_time_at_intervall))
data = left_join(data,
                 time_in_depth_ranges %>% filter(after_max_depth_depth_interval == "10-20") %>%
                   select(after_max_depth_time_at_intervall_10_20 = after_max_depth_time_at_intervall))
data = left_join(data,
                 time_in_depth_ranges %>% filter(after_max_depth_depth_interval == "20-30") %>%
                   select(after_max_depth_time_at_intervall_20_30 = after_max_depth_time_at_intervall))
data = left_join(data,
                 time_in_depth_ranges %>% filter(after_max_depth_depth_interval == "30-40") %>%
                   select(after_max_depth_time_at_intervall_30_40 = after_max_depth_time_at_intervall))
data = left_join(data,
                 time_in_depth_ranges_total %>% filter(depth_interval == "0-10") %>%
                   select(total_time_at_intervall_0_10 = time_spent))
data = left_join(data,
                 time_in_depth_ranges_total %>% filter(depth_interval == "10-20") %>%
                   select(total_time_at_intervall_10_20 = time_spent))
data = left_join(data,
                 time_in_depth_ranges_total %>% filter(depth_interval == "20-30") %>%
                   select(total_time_at_intervall_20_30 = time_spent))
data = left_join(data,
                 time_in_depth_ranges_total %>% filter(depth_interval == "30-40") %>%
                   select(total_time_at_intervall_30_40 = time_spent))
data = left_join(data,
                 time_in_depth_ranges_total %>% filter(depth_interval == "40+") %>%
                   select(total_time_at_intervall_30_40 = time_spent))
glimpse(data)


write.csv(data,"data_with_diveprofiles.csv")


#Apply prediction Formula by Fichtner 2021
data$published_formula = (0.0196785 * data$age - 0.0068313 * data$surface_interval_hr_min + 0.0228502 * data$max_depth +
                            0.0002302 * data$BarLiter)^2

#Correlation table
data$person_id_f = as.factor(data$person_id)
cor(data %>% dplyr::select(bubble_grade,age,bmi,BarLiter,total_dive_time_min,max_depth,dive_order,dive:total_time_at_intervall_0_10), use = "pairwise.complete.obs",method = "spearman")

library(rstatix)
sprmn = data %>% dplyr::select(bubble_grade,age,bmi,BarLiter,total_dive_time_min,max_depth,dive_order,after_max_depth_time_at_intervall_0_10:total_time_at_intervall_0_10,published_formula) %>%  
  cor_test(method = "spearman") %>% filter(var1=="bubble_grade") %>% mutate(p_round=round(p,3))
sprmn
write_csv(sprmn,file = "figures/table_correlation.csv")

#Summary Statistics
sumstats = na.omit(data %>% select(bubble_grade,max_depth,after_max_depth_time_at_intervall_0_10,gender,age,total_dive_time_min,person_id_f))

sd(sumstats$max_depth)
summary(sumstats$max_depth)
sd(sumstats$total_dive_time_min)
summary(sumstats$total_dive_time_min)


sumstats = sumstats %>% select(gender,age,person_id_f)
sumstats=unique(sumstats)
table(sumstats$gender)
summary(sumstats$age)
sd(sumstats$age)

table(sumstats$bubble_grade)
sd(sumstats$bubble_grade)


#Linear Model
#Standardized
mod = lmer(scale(log(bubble_grade+1) )~ scale(max_depth) * scale(after_max_depth_time_at_intervall_0_10)  +(1|person_id_f), data=data,REML = T)
summary(mod)
library(jtools)
summ(mod)

#Unstandardized
mod = lmer((log(bubble_grade+1) )~ (max_depth) * (after_max_depth_time_at_intervall_0_10)  +(1|person_id_f), data=data)
summary(mod)
library(jtools)
summ(mod)


# Plot LM
mod_vis = lm((log(bubble_grade+1) )~  (max_depth) * (after_max_depth_time_at_intervall_0_10), data=data)
summary(mod_vis)

qqp(mod_vis$residuals)
plot(mod_vis)
hist(mod_vis$residuals)


# Choose three specific values for after_max_depth_time_at_intervall_0_10 (e.g., low, medium, high)
after_max_depth_time_values <- quantile(data$after_max_depth_time_at_intervall_0_10, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
# Generate a sequence of values for max_depth
max_depth_seq <- seq(min(data$max_depth, na.rm = TRUE), max(data$max_depth, na.rm = TRUE), length.out = 100)

# Create a grid of values for the three selected after_max_depth_time_at_intervall_0_10 values
grid <- expand.grid(max_depth = max_depth_seq, after_max_depth_time_at_intervall_0_10 = after_max_depth_time_values)

# Add predictions to the grid using the scaled variables
grid$log_bubble_grade_pred <- predict(mod_vis, newdata = data.frame(max_depth = grid$max_depth, after_max_depth_time_at_intervall_0_10 = grid$after_max_depth_time_at_intervall_0_10))

grid$bubble_grade_pred = exp(grid$log_bubble_grade_pred)-1

# Plot the interaction effect with only three lines
figure_2 = ggplot(grid, aes(x = max_depth, y = bubble_grade_pred, color = factor(after_max_depth_time_at_intervall_0_10))) +
  geom_smooth(se=F)+
  scale_color_viridis_d(end = 0.8) +
  labs(x = "maximum depth (m)", y = "predicted average bubble grade", color = "dive time after max depth \nbetween 0 and 10 m (min)") +
  ylim(c(0,1.5))+xlim(0,44)+
  theme_minimal() +
  ggtitle("")

figure_2

ggsave("figures/figure_2.jpg",plot = figure_2,units = "cm",height = 10,width = 15,dpi=400)




####Recalculate Formula###
data2 = data %>% select(bubble_grade,age,BarLiter,surface_interval_hr_min,max_depth,published_formula,after_max_depth_time_at_intervall_0_10)
data2 = na.omit(data2)

mod = lm((sqrt(bubble_grade) )~ age  + surface_interval_hr_min  +max_depth+BarLiter  , data=data2)
summary(mod)
data2$old_formula_recalculated = (mod$fitted.values+0.4474)^2

cor(data.frame(data2$old_formula_recalculated,data2$bubble_grade,data2$published_formula),method = "spearman")


mod = lm((sqrt(bubble_grade) )~ age  + surface_interval_hr_min  +after_max_depth_time_at_intervall_0_10*max_depth+BarLiter , data=data2)
summary(mod)
data2$new_formula = (mod$fitted.values+0.6733231)^2
cor(data.frame(data2$old_formula_recalculated,data2$bubble_grade,data2$published_formula,data2$new_formula),method = "spearman")


#Published Formula Fichtner et al., 2021
data$published_formula = (0.0196785 * data$age - 0.0068313 * data$surface_interval_hr_min + 0.0228502 * data$max_depth +
                            0.0002302 * data$BarLiter)^2
data$bubble_grade

cor(data$bubble_grade,data$published_formula, use="pairwise.complete.obs", method = "spearman")

# Create the boxplot with flipped axes and regression line
published_formula_fig = ggplot(data, aes(x = factor(bubble_grade), y = published_formula)) +
  geom_boxplot(fill = "#31678e", alpha = 0.5) +
  geom_jitter(width = 0.1, alpha = 0.2, size=1) +
  geom_smooth(aes(x = as.numeric(factor(bubble_grade))), method = "lm", se = F, color = "red",size=1.2) +
  labs(
    title = "",
    x = "Observed Bubble Grade",
    y = "Predicted Bubble Grade \n (Published Formula Fichtner et al., 2021)"
  ) +
  theme_minimal()
published_formula_fig
ggsave("figures/figure_3_published_formula_boxplot.jpg",plot = published_formula_fig,units = "cm",height = 12,width = 16, dpi=400)

