if (!requireNamespace("caret", quietly = TRUE)) {
  install.packages("caret")
}
if (!requireNamespace("cluster", quietly = TRUE)) {
  install.packages("cluster")
}
if (!requireNamespace("data.table", quietly = TRUE)) {
  install.packages("data.table")
}

library(caret)
library(cluster)
library(data.table)

#### data ####
catchments <- fread(
  "catchments.csv",
  stringsAsFactors = TRUE,
  encoding = "UTF-8"
)

#### variables included in cluster analysis ####
vars <- c(
  "Gauge",
  "A",
  "PCI",
  "RR",
  "ESDB_Meta",
  "ESDB_Lime",
  "ESDB_Fluv",
  "CLC2018_333",
  "CLC2018_324",
  "CLC2018_321",
  "S",
  "E_max",
  "E_min"
)

#### catchment means ####
catchments_mean <- catchments[, lapply(.SD, mean), by = Gauge, .SDcols = vars[-1]]

#### centering and scaling ####
cenSca <- preProcess(catchments_mean[, -1], method = c("center", "scale"))
catchments_mean_cenSca <- cbind(
  catchments_mean[, 1],
  predict(cenSca, catchments_mean[, -1])
)

#### PCA ####
pca <- prcomp(catchments_mean_cenSca[, -1])
plot(pca)
catchments_mean_cenSca_pca <- predict(pca, catchments_mean_cenSca)

#### cluster analysis ####
pam_cluster <- pam(catchments_mean_cenSca_pca[, 1:2], k = 3, metric = "euclidean")
plot(pam_cluster)

catchments_mean_cluster <- cbind(
  catchments_mean[, 1],
  Cluster = factor(
    as.character(pam_cluster$clustering),
    labels = c("Cluster 2", "Cluster 1", "Cluster 3")
  )
)

#### same clusters as in catchments.csv? ####
all(
  as.character(catchments[, .SD[1], by = Gauge]$Cluster) ==
    as.character(catchments_mean_cluster$Cluster)
) # clusters from BaHSYM are used for BaHSYM_de_Vente_et_al_2011 as well
