# Figure 7: human sulphur values and outliers 


# load packages 
library(dplyr)
library(ggplot2)
library(hdrcde)
library(patchwork)

#load dataset
df <- read.csv(file.choose(), check.names = FALSE)


## function (Rank plot & Boxplot & Density)
Denplot <- function(dat, sample, iso,
                    id_col = "Individual",
                    prob_hdr = 95,
                    pig_label = "S_domesticus (mean ± 2SD)",
                    fauna_label = "All fauna pooled (mean ± 2SD)") {
  
  # --- checks ---
  if (!is.data.frame(dat)) stop("`dat` must be a data.frame.")
  if (!("Sample" %in% names(dat))) stop("`dat` must contain a column named `Sample`.")
  if (!(id_col %in% names(dat))) stop(paste0("ID column `", id_col, "` not found."))
  if (!is.character(iso) || length(iso) != 1) stop("`iso` must be a single string.")
  if (!(iso %in% names(dat))) stop(paste0("Column `", iso, "` not found."))
  
  # --- keep complete cases for isotope ---
  dat2 <- dat %>% filter(!is.na(.data[[iso]]))
  
  # Panel 1
  humans <- dat2 %>%
    filter(Sample == sample) %>%
    mutate(.iso = .data[[iso]])
  
  if (nrow(humans) < 2)
    stop("Not enough non-NA values for the chosen sample + isotope.")
  
  humans_rank <- humans %>%
    arrange(.iso) %>%
    mutate(rank = row_number())
  
  p1 <- ggplot(humans_rank, aes(x = rank, y = .iso)) +
    geom_point(
      shape = 21,
      size = 2.2,
      alpha = 0.85,
      fill = "grey80",
      color = "black",
      stroke = 0.4
    ) +
    theme_classic() +
    labs(
      title = paste0(sample, " individuals ordered by increasing ", iso),
      x = "Individuals (rank)",
      y = iso
    )
  
  # Panel 2
  out_df <- dat2 %>%
    group_by(Sample) %>%
    mutate(
      Q1  = quantile(.data[[iso]], 0.25, na.rm = TRUE),
      Q3  = quantile(.data[[iso]], 0.75, na.rm = TRUE),
      IQR = Q3 - Q1,
      is_outlier =
        .data[[iso]] < (Q1 - 1.5 * IQR) |
        .data[[iso]] > (Q3 + 1.5 * IQR)
    ) %>%
    ungroup()

  out_df$Sample <- factor(out_df$Sample, levels = unique(out_df$Sample))
  
  p2 <- ggplot(out_df, aes(x = Sample, y = .data[[iso]])) +
    geom_boxplot(
      fill = "grey85",
      color = "black",
      outlier.shape = NA
    ) +
    geom_jitter(
      data = subset(out_df, !is_outlier),
      color = "grey60",
      width = 0.15,
      size = 2
    ) +
    geom_jitter(
      data = subset(out_df, is_outlier),
      color = "red3",
      width = 0.15,
      size = 2.4
    ) +
    theme_classic() +
    labs(
      title = "Boxplot (all samples)",
      x = NULL,
      y = iso
    )
  
  # Add faunal ranges (mean ± 2SD, pig & pooled fauna
  fauna_vals <- dat2 %>%
    filter(Sample != sample) %>%
    pull(.data[[iso]]) %>%
    na.omit()
  
  pig_vals <- dat2 %>%
    filter(Sample == "S_domesticus") %>%
    pull(.data[[iso]]) %>%
    na.omit()
  
  get_m2sd <- function(x) {
    if (length(x) < 2) return(NULL)
    m <- mean(x); s <- sd(x)
    list(m = m, lo = m - 2*s, hi = m + 2*s, n = length(x))
  }
  
  pig_m2sd   <- get_m2sd(pig_vals)
  fauna_m2sd <- get_m2sd(fauna_vals)
  
  
  levs <- levels(out_df$Sample)
  human_x <- which(levs == sample)
  
  fauna_xs <- which(levs != sample)  # all fauna categories present in the plot
  fauna_x_mid <- if (length(fauna_xs) > 0) mean(fauna_xs) else NA_real_
  

  if (!is.null(pig_m2sd) && "S_domesticus" %in% levs) {
    pig_x <- which(levs == "S_domesticus")
    p2 <- p2 +
      annotate("segment", x = pig_x, xend = pig_x, y = pig_m2sd$lo, yend = pig_m2sd$hi, linewidth = 1.0) +
      annotate("segment", x = pig_x - 0.12, xend = pig_x + 0.12, y = pig_m2sd$lo, yend = pig_m2sd$lo, linewidth = 1.0) +
      annotate("segment", x = pig_x - 0.12, xend = pig_x + 0.12, y = pig_m2sd$hi, yend = pig_m2sd$hi, linewidth = 1.0) +
      annotate("point",   x = pig_x, y = pig_m2sd$m, shape = 21, size = 2.6, fill = "white", color = "black", stroke = 0.6) +
      annotate("text",    x = pig_x, y = pig_m2sd$hi, label = paste0(pig_label, "\n(n=", pig_m2sd$n, ")"),
               vjust = -0.6, size = 3)
  }
  

  if (!is.null(fauna_m2sd) && is.finite(fauna_x_mid)) {
    p2 <- p2 +
      annotate("segment", x = fauna_x_mid, xend = fauna_x_mid, y = fauna_m2sd$lo, yend = fauna_m2sd$hi,
               linewidth = 0.9, linetype = "dashed") +
      annotate("segment", x = fauna_x_mid - 0.12, xend = fauna_x_mid + 0.12, y = fauna_m2sd$lo, yend = fauna_m2sd$lo,
               linewidth = 0.9, linetype = "dashed") +
      annotate("segment", x = fauna_x_mid - 0.12, xend = fauna_x_mid + 0.12, y = fauna_m2sd$hi, yend = fauna_m2sd$hi,
               linewidth = 0.9, linetype = "dashed") +
      annotate("point",   x = fauna_x_mid, y = fauna_m2sd$m, shape = 21, size = 2.4, fill = "white", color = "black", stroke = 0.6) +
      annotate("text",    x = fauna_x_mid, y = fauna_m2sd$hi, label = paste0(fauna_label, "\n(n=", fauna_m2sd$n, ")"),
               vjust = -0.6, size = 3)
  }
  
  # panel 3
  target <- humans_rank$.iso
  
  bw <- bw.SJ(target)
  hd <- hdrcde::hdr.den(target, prob = prob_hdr, plot = FALSE, h = bw)
  hdr_int <- hd$hdr
  
  inside <- rep(FALSE, length(target))
  for (i in seq_len(nrow(hdr_int))) {
    inside <- inside | (target >= hdr_int[i, 1] & target <= hdr_int[i, 2])
  }
  
  p3 <- ggplot(data.frame(x = target), aes(x = x)) +
    geom_density(bw = bw, linewidth = 0.9) +
    geom_rug(
      data = data.frame(x = target[inside]),
      aes(x = x),
      sides = "b",
      alpha = 0.35,
      linewidth = 1,
      color = "grey40"
    ) +
    geom_rug(
      data = data.frame(x = target[!inside]),
      aes(x = x),
      sides = "b",
      linewidth = 1,
      color = "black"
    ) +
    theme_classic() +
    labs(
      title = paste0(iso, " ", sample, " distribution"),
      x = iso,
      y = "Density"
    )
  
  # Add faunal ranges
  ymax <- ggplot_build(p3)$layout$panel_params[[1]]$y.range[2]
  y_pig   <- 0.03 * ymax
  y_fauna <- 0.08 * ymax
  
  if (!is.null(pig_m2sd)) {
    p3 <- p3 +
      annotate("segment", x = pig_m2sd$lo, xend = pig_m2sd$hi, y = y_pig, yend = y_pig, linewidth = 1.1) +
      annotate("segment", x = pig_m2sd$lo, xend = pig_m2sd$lo, y = y_pig * 0.85, yend = y_pig * 1.15, linewidth = 1.1) +
      annotate("segment", x = pig_m2sd$hi, xend = pig_m2sd$hi, y = y_pig * 0.85, yend = y_pig * 1.15, linewidth = 1.1) +
      annotate("point",   x = pig_m2sd$m, y = y_pig, shape = 21, size = 2.2, fill = "white", color = "black", stroke = 0.6) +
      annotate("text",    x = pig_m2sd$m, y = y_pig * 1.6, label = paste0(pig_label, " (n=", pig_m2sd$n, ")"), size = 3)
  }
  
  if (!is.null(fauna_m2sd)) {
    p3 <- p3 +
      annotate("segment", x = fauna_m2sd$lo, xend = fauna_m2sd$hi, y = y_fauna, yend = y_fauna,
               linewidth = 1.0, linetype = "dashed") +
      annotate("segment", x = fauna_m2sd$lo, xend = fauna_m2sd$lo, y = y_fauna * 0.85, yend = y_fauna * 1.15,
               linewidth = 1.0, linetype = "dashed") +
      annotate("segment", x = fauna_m2sd$hi, xend = fauna_m2sd$hi, y = y_fauna * 0.85, yend = y_fauna * 1.15,
               linewidth = 1.0, linetype = "dashed") +
      annotate("point",   x = fauna_m2sd$m, y = y_fauna, shape = 21, size = 2.2, fill = "white", color = "black", stroke = 0.6) +
      annotate("text",    x = fauna_m2sd$m, y = y_fauna * 1.6, label = paste0(fauna_label, " (n=", fauna_m2sd$n, ")"), size = 3)
  }
  
  # -Combine panels, single figure 
  p1 + p2 + p3 + plot_layout(ncol = 3)
}


