# PCA for divergence analysis: reduction of acoustic variables

# Packages
library(car)
library(rgl)
library(EFAtools)
library(dplyr)
library(psych) 


# PCA

# Load data
mydata <- read.csv(file = "/divergence_pca.csv", header = TRUE)

# Preliminary selection of transformed variables to minimize outlier influence.
dataset <- mydata[, c("meanfreq", "sd", "freq.median", "freq.Q25", "freq.Q75_sqrt", 
                      "freq.IQR_log", "time.median_log", "time.Q25_sqrt", "time.Q75_log", 
                      "time.IQR_log", "skew_log", "kurt_log", "sp.ent_sq", "time.ent_sq", 
                      "entropy_sq", "sfm", "meandom", "mindom_sqrt", "maxdom_log", 
                      "dfrange_log", "modindx_log", "startdom", "enddom", "dfslope_sqrt", 
                      "meanpeakf", "peakf", "h1_freq_sq", "h1_width_sqrt", "h2_freq", 
                      "h2_width_sqrt", "h3_freq_sq", "h3_width", "harmonics_sq", 
                      "HNR_log", "meanfun", "minfun", "maxfun_log", "num.elms_log", 
                      "elm.duration_log", "song.duration_log", "song.rate_log", 
                      "gap.duration_sqrt")]

# Redundancy reduction: final subset selected after removing uninformative, uncorrelated, and low-KMO variables.
dataset2 <- mydata[, c("elm.duration_log", "time.median_log", "modindx_log", "entropy_sq", 
                       "gap.duration_sqrt", "song.rate_log", "maxdom_log", "meanfreq", 
                       "peakf", "freq.median", "meanpeakf", "dfslope_sqrt", 
                       "maxfun_log", "minfun")]

dataset2 <- as.data.frame(lapply(dataset2, as.numeric))
# Multicollinearity assessment: correlation matrix of the selected acoustic parameters.
data_matrix <- cor(dataset2, use = "complete.obs")
data_matrix

# KMO test for sampling adequacy.
EFAtools::KMO(x = data_matrix)

# Bartlett's test of sphericity for data suitability.
EFAtools::BARTLETT(x = data_matrix, N = 1616)

# Initial PCA without rotation (all components retained).
pca1 <- psych::principal(dataset2, nfactors = length(dataset2),rotate = "none")

# Scree plot for considering component selection.
plot(pca1$values, type = "b")

# Final PCA model (3 components, Varimax rotation).
ultimatepca <- psych::principal(r = dataset2,
                                nfactors = 3,
                                rotate = "Varimax",
                                eps = 1e-7,
                                use = "pairwise.complete.obs")












