if (!requireNamespace("caret", quietly = TRUE)) {
  install.packages("caret")
}
if (!requireNamespace("cluster", quietly = TRUE)) {
  install.packages("cluster")
}
if (!requireNamespace("data.table", quietly = TRUE)) {
  install.packages("data.table")
}

library(caret)
library(cluster)
library(data.table)

#### data ####
catchments <- fread(
  "catchments.csv",
  stringsAsFactors = TRUE,
  encoding = "UTF-8"
)

#### variables included in cluster analysis ####
vars <- c(
  "Gauge",
  "A",
  "E",
  "S",
  "l_p",
  "l_a",
  "q",
  "Glc",
  "Agr_8",
  "Nat",
  "Alp",
  "Grl",
  "Lakes"
)

#### catchment means ####
catchments_mean <- catchments[, lapply(.SD, mean), by = Gauge, .SDcols = vars[-1]]

#### centering and scaling ####
cenSca <- preProcess(catchments_mean[, -1], method = c("center", "scale"))
catchments_mean_cenSca <- cbind(
  catchments_mean[, 1],
  predict(cenSca, catchments_mean[, -1])
)

#### PCA ####
pca <- prcomp(catchments_mean_cenSca[, -1])
plot(pca)
catchments_mean_cenSca_pca <- predict(pca, catchments_mean_cenSca)

#### cluster analysis ####
pam_cluster <- pam(catchments_mean_cenSca_pca[, 1:2], k = 3, metric = "euclidean")
plot(pam_cluster)

catchments_mean_cluster <- cbind(
  catchments_mean[, 1],
  Cluster = factor(
    as.character(pam_cluster$clustering),
    labels = c("Cluster 2", "Cluster 1", "Cluster 3")
  )
)

#### same clusters as in catchments.csv? ####
all(
  as.character(catchments[, .SD[1], by = Gauge]$Cluster) ==
    as.character(catchments_mean_cluster$Cluster)
)
