list.of.packages <- c("heplots", "MASS", "ggplot2","klaR", "psych",
"devtools")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
install_github("fawda123/ggord")
library(psych)
library(heplots)
library(MASS)
library(ggplot2)
library(devtools)
library(ggord)
library(klaR)
setwd("path_to_your_working_directory")
path2 <- "path_to_your_data_directory"
metadata <- read.table(
file = paste0(path2, "Supplement1_Metadata.csv"),
header = T, sep = ";"
)[, c(
"Collection", "Species1", "Species2",
"Sex", "Crowding", "Locality"
), ]
table(metadata$Species2)
##
## A B C K M N O Q3 Q5 S
## 35 48 74 62 30 58 30 23 43 78
varnames <- c(
"tr_lda1", "tr_lda2", "tr_lda3",
"TN_B", "TN_K", "TN_N", "TN_Q", "M_TN",
"TS_B", "TS_K", "TS_N", "TS_Q", "M_TS"
)
# TN : training, TS: test, M: Geomeans
varnames2 <- c("moment", "mle", "mve", "t")
varnames3 <- c("harmonic", "method", "mean")
n <- 300 # number of random sampling iterations
shape_4 <- read.csv(paste0(path2,"FFT/4_Harmonics/fourier_fit4.csv"), header = T)
shape_4 <- shape_4[, 2:ncol(shape_4)]
shape_4 <- merge(shape_4, metadata[, 1:6], by.x = "collection", by.y = "Collection")
colnames(shape_4)[c(2:7)] <- c("A2", "A3", "A4", "B2", "B3", "B4")
shape_8 <- read.csv(paste0(path2,"FFT/8_Harmonics/fourier_fit8.csv"), header = T)
shape_8 <- shape_8[, 2:ncol(shape_8)]
shape_8 <- merge(shape_8, metadata[, 1:6], by.x = "collection", by.y = "Collection")
colnames(shape_8)[c(2:15)] <- c("A2", "A3", "A4", "A5", "A6", "A7", "A8", "B2", "B3", "B4", "B5", "B6", "B7", "B8")
shape_12 <- read.csv(paste0(path2,"FFT/12_Harmonics/fourier_fit12.csv"), header = T)
shape_12 <- shape_12[, 2:ncol(shape_12)]
shape_12 <- merge(shape_12, metadata[, 1:6], by.x = "collection", by.y = "Collection")
colnames(shape_12)[c(2:23)] <- c("A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11", "B12")
shape_16 <- read.csv(paste0(path2,"FFT/16_Harmonics/fourier_fit16.csv"), header = T)
shape_16 <- shape_16[, 2:ncol(shape_16)]
shape_16 <- merge(shape_16, metadata[, 1:6], by.x = "collection", by.y = "Collection")
colnames(shape_16)[c(2:31)] <- c("A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15", "A16", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11", "B12", "B13", "B14", "B15", "B16")
shape_24 <- read.csv(paste0(path2,"FFT/24_Harmonics/fourier_fit24.csv"), header = T)
shape_24 <- shape_24[, 2:ncol(shape_24)]
shape_24 <- merge(shape_24, metadata[, 1:6], by.x = "collection", by.y = "Collection")
colnames(shape_24)[c(2:47)] <- c("A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15", "A16", "A17", "A18", "A19", "A20", "A21", "A22", "A23", "A24", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "B10", "B11", "B12", "B13", "B14", "B15", "B16", "B17", "B18", "B19", "B20", "B21", "B22", "B23", "B24")
BKNQ4_crowded <- shape_4[which(shape_4$Crowding == "Y"), ] # exclude juveniles
B_shape_4 <- shape_4[which(shape_4$Species2 == "B"), ]
B_shape_4_crowded <- B_shape_4[which(B_shape_4$Crowding == "Y"), ]
K_shape_4 <- shape_4[which(shape_4$Species2 == "K"), ]
K_shape_4_crowded <- K_shape_4[which(K_shape_4$Crowding == "Y"), ]
N_shape_4 <- shape_4[which(shape_4$Species2 == "N"), ]
N_shape_4_crowded <- N_shape_4[which(N_shape_4$Crowding == "Y"), ]
Q5_shape_4 <- shape_4[which(shape_4$Species2 == "Q5"), ]
Q5_shape_4_crowded <- Q5_shape_4[which(Q5_shape_4$Crowding == "Y"), ]
BKNQ8_crowded <- shape_8[which(shape_8$Crowding == "Y"), ] # exclude juveniles
B_shape_8 <- shape_8[which(shape_8$Species2 == "B"), ]
B_shape_8_crowded <- B_shape_8[which(B_shape_8$Crowding == "Y"), ]
K_shape_8 <- shape_8[which(shape_8$Species2 == "K"), ]
K_shape_8_crowded <- K_shape_8[which(K_shape_8$Crowding == "Y"), ]
N_shape_8 <- shape_8[which(shape_8$Species2 == "N"), ]
N_shape_8_crowded <- N_shape_8[which(N_shape_8$Crowding == "Y"), ]
Q5_shape_8 <- shape_8[which(shape_8$Species2 == "Q5"), ]
Q5_shape_8_crowded <- Q5_shape_8[which(Q5_shape_8$Crowding == "Y"), ]
BKNQ12_crowded <- shape_12[which(shape_12$Crowding == "Y"), ] # exclude juveniles
B_shape_12 <- shape_12[which(shape_12$Species2 == "B"), ]
B_shape_12_crowded <- B_shape_12[which(B_shape_12$Crowding == "Y"), ]
K_shape_12 <- shape_12[which(shape_12$Species2 == "K"), ]
K_shape_12_crowded <- K_shape_12[which(K_shape_12$Crowding == "Y"), ]
N_shape_12 <- shape_12[which(shape_12$Species2 == "N"), ]
N_shape_12_crowded <- N_shape_12[which(N_shape_12$Crowding == "Y"), ]
Q5_shape_12 <- shape_12[which(shape_12$Species2 == "Q5"), ]
Q5_shape_12_crowded <- Q5_shape_12[which(Q5_shape_12$Crowding == "Y"), ]
BKNQ16_crowded <- shape_16[which(shape_16$Crowding == "Y"), ] # exclude juveniles
B_shape_16 <- shape_16[which(shape_16$Species2 == "B"), ]
B_shape_16_crowded <- B_shape_16[which(B_shape_16$Crowding == "Y"), ]
K_shape_16 <- shape_16[which(shape_16$Species2 == "K"), ]
K_shape_16_crowded <- K_shape_16[which(K_shape_16$Crowding == "Y"), ]
N_shape_16 <- shape_16[which(shape_16$Species2 == "N"), ]
N_shape_16_crowded <- N_shape_16[which(N_shape_16$Crowding == "Y"), ]
Q5_shape_16 <- shape_16[which(shape_16$Species2 == "Q5"), ]
Q5_shape_16_crowded <- Q5_shape_16[which(Q5_shape_16$Crowding == "Y"), ]
BKNQ24_crowded <- shape_24[which(shape_24$Crowding == "Y"), ] # exclude juveniles
B_shape_24 <- shape_24[which(shape_24$Species2 == "B"), ]
B_shape_24_crowded <- B_shape_24[which(B_shape_24$Crowding == "Y"), ]
K_shape_24 <- shape_24[which(shape_24$Species2 == "K"), ]
K_shape_24_crowded <- K_shape_24[which(K_shape_24$Crowding == "Y"), ]
N_shape_24 <- shape_24[which(shape_24$Species2 == "N"), ]
N_shape_24_crowded <- N_shape_24[which(N_shape_24$Crowding == "Y"), ]
Q5_shape_24 <- shape_24[which(shape_24$Species2 == "Q5"), ]
Q5_shape_24_crowded <- Q5_shape_24[which(Q5_shape_24$Crowding == "Y"), ]
res_har4 <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har4) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_4), size = 21)
train_2 <- sample(nrow(K_shape_4), size = 21)
train_3 <- sample(nrow(N_shape_4), size = 21)
train_4 <- sample(nrow(Q5_shape_4), size = 21)
train_subset_1 <- as.data.frame(B_shape_4[train_1, ])
train_subset_2 <- as.data.frame(K_shape_4[train_2, ])
train_subset_3 <- as.data.frame(N_shape_4[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_4[train_4, ])
BKNQ4.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_4[-train_1, ])
test_2 <- as.data.frame(K_shape_4[-train_2, ])
test_3 <- as.data.frame(N_shape_4[-train_3, ])
test_4 <- as.data.frame(Q5_shape_4[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ4.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ4.train.lda <- lda(Species2 ~ A2 + A3 + A4 + B2 + B3 + B4,
data = BKNQ4.train, na.action = na.omit)
res_har4[i, 1:3] <- prop.table(BKNQ4.train.lda$svd^2)
BKNQ4.train.lda.values <- predict(BKNQ4.train.lda)
prediction.lda.accuracy4 <- table(BKNQ4.train$Species2,
BKNQ4.train.lda.values$class)
res_har4[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy4, 1))))
res_har4[i, 8] <- exp(rowMeans(log(res_har4[i, 4:7])))
BKNQ4.lda.predict.posteriors <- as.data.frame(predict(BKNQ4.train.lda,
newdata = BKNQ4.test)$posterior)
BKNQ4.lda.predict <- predict(BKNQ4.train.lda, newdata = BKNQ4.test)
prediction.accuracy4 <- table(BKNQ4.test$Species2, BKNQ4.lda.predict$class)
res_har4[i, 9:12] <- diag(prop.table(prediction.accuracy4, 1))
res_har4[i, 13] <- exp(rowMeans(log(res_har4[i, 9:12])))
}
res_har8 <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har8) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_8), size = 21)
train_2 <- sample(nrow(K_shape_8), size = 21)
train_3 <- sample(nrow(N_shape_8), size = 21)
train_4 <- sample(nrow(Q5_shape_8), size = 21)
train_subset_1 <- as.data.frame(B_shape_8[train_1, ])
train_subset_2 <- as.data.frame(K_shape_8[train_2, ])
train_subset_3 <- as.data.frame(N_shape_8[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_8[train_4, ])
BKNQ8.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_8[-train_1, ])
test_2 <- as.data.frame(K_shape_8[-train_2, ])
test_3 <- as.data.frame(N_shape_8[-train_3, ])
test_4 <- as.data.frame(Q5_shape_8[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ8.test <- as.data.frame(rbind(
test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ8.train.lda <- lda(
Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, na.action = na.omit
)
res_har8[i, 1:3] <- prop.table(BKNQ8.train.lda$svd^2)
BKNQ8.train.lda.values <- predict(BKNQ8.train.lda)
prediction.lda.accuracy8 <- table(BKNQ8.train$Species2,
BKNQ8.train.lda.values$class)
res_har8[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy8, 1))))
res_har8[i, 8] <- exp(rowMeans(log(res_har8[i, 4:7])))
BKNQ8.lda.predict.posteriors <- as.data.frame(predict(BKNQ8.train.lda,
newdata = BKNQ8.test)$posterior)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
prediction.accuracy8 <- table(BKNQ8.test$Species2, BKNQ8.lda.predict$class)
res_har8[i, 9:12] <- diag(prop.table(prediction.accuracy8, 1))
res_har8[i, 13] <- exp(rowMeans(log(res_har8[i, 9:12])))
}
res_har12 <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har12) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_12), size = 21)
train_2 <- sample(nrow(K_shape_12), size = 21)
train_3 <- sample(nrow(N_shape_12), size = 21)
train_4 <- sample(nrow(Q5_shape_12), size = 21)
train_subset_1 <- as.data.frame(B_shape_12[train_1, ])
train_subset_2 <- as.data.frame(K_shape_12[train_2, ])
train_subset_3 <- as.data.frame(N_shape_12[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_12[train_4, ])
BKNQ12.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_12[-train_1, ])
test_2 <- as.data.frame(K_shape_12[-train_2, ])
test_3 <- as.data.frame(N_shape_12[-train_3, ])
test_4 <- as.data.frame(Q5_shape_12[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ12.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
+B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, na.action = na.omit)
res_har12[i, 1:3] <- prop.table(BKNQ12.train.lda$svd^2)
BKNQ12.train.lda.values <- predict(BKNQ12.train.lda)
prediction.lda.accuracy12 <- table(BKNQ12.train$Species2,
BKNQ12.train.lda.values$class)
res_har12[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy12, 1))))
res_har12[i, 8] <- exp(rowMeans(log(res_har12[i, 4:7])))
BKNQ12.lda.predict.posteriors <- as.data.frame(predict(BKNQ12.train.lda,
newdata = BKNQ12.test)$posterior)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
prediction.accuracy12 <- table(BKNQ12.test$Species2, BKNQ12.lda.predict$class)
res_har12[i, 9:12] <- diag(prop.table(prediction.accuracy12, 1))
res_har12[i, 13] <- exp(rowMeans(log(res_har12[i, 9:12])))
}
res_har16 <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har16) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_16), size = 21)
train_2 <- sample(nrow(K_shape_16), size = 21)
train_3 <- sample(nrow(N_shape_16), size = 21)
train_4 <- sample(nrow(Q5_shape_16), size = 21)
train_subset_1 <- as.data.frame(B_shape_16[train_1, ])
train_subset_2 <- as.data.frame(K_shape_16[train_2, ])
train_subset_3 <- as.data.frame(N_shape_16[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_16[train_4, ])
BKNQ16.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_16[-train_1, ])
test_2 <- as.data.frame(K_shape_16[-train_2, ])
test_3 <- as.data.frame(N_shape_16[-train_3, ])
test_4 <- as.data.frame(Q5_shape_16[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ16.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ16.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 + A13 + A14 + A15 + A16 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12 + B13 + B14 + B15 + B16,
data = BKNQ16.train, na.action = na.omit)
res_har16[i, 1:3] <- prop.table(BKNQ16.train.lda$svd^2)
BKNQ16.train.lda.values <- predict(BKNQ16.train.lda)
prediction.lda.accuracy16 <- table(BKNQ16.train$Species2,
BKNQ16.train.lda.values$class)
res_har16[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy16, 1))))
res_har16[i, 8] <- exp(rowMeans(log(res_har16[i, 4:7])))
BKNQ16.lda.predict.posteriors <- as.data.frame(predict(BKNQ16.train.lda,
newdata = BKNQ16.test)$posterior)
BKNQ16.lda.predict <- predict(BKNQ16.train.lda, newdata = BKNQ16.test)
prediction.accuracy16 <- table(BKNQ16.test$Species2, BKNQ16.lda.predict$class)
res_har16[i, 9:12] <- diag(prop.table(prediction.accuracy16, 1))
res_har16[i, 13] <- exp(rowMeans(log(res_har16[i, 9:12])))
}
res_har24 <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har24) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_24), size = 21)
train_2 <- sample(nrow(K_shape_24), size = 21)
train_3 <- sample(nrow(N_shape_24), size = 21)
train_4 <- sample(nrow(Q5_shape_24), size = 21)
train_subset_1 <- as.data.frame(B_shape_24[train_1, ])
train_subset_2 <- as.data.frame(K_shape_24[train_2, ])
train_subset_3 <- as.data.frame(N_shape_24[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_24[train_4, ])
BKNQ24.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_24[-train_1, ])
test_2 <- as.data.frame(K_shape_24[-train_2, ])
test_3 <- as.data.frame(N_shape_24[-train_3, ])
test_4 <- as.data.frame(Q5_shape_24[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ24.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ24.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 + A13 + A14 + A15 +
A16 + A17 + A18 + A19 + A20 + A21 + A22 + A23 + A24 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12 + B13 + B14 + B15 +
B16 + B17 + B18 + B19 + B20 + B21 + B22 + B23 + B24,
data = BKNQ24.train, na.action = na.omit)
res_har24[i, 1:3] <- prop.table(BKNQ24.train.lda$svd^2)
BKNQ24.train.lda.values <- predict(BKNQ24.train.lda)
prediction.lda.accuracy24 <- table(BKNQ24.train$Species2,
BKNQ24.train.lda.values$class)
res_har24[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy24, 1))))
res_har24[i, 8] <- exp(rowMeans(log(res_har24[i, 4:7])))
BKNQ24.lda.predict.posteriors <- as.data.frame(predict(BKNQ24.train.lda,
newdata = BKNQ24.test)$posterior)
BKNQ24.lda.predict <- predict(BKNQ24.train.lda, newdata = BKNQ24.test)
prediction.accuracy24 <- table(BKNQ24.test$Species2, BKNQ24.lda.predict$class)
res_har24[i, 9:12] <- diag(prop.table(prediction.accuracy24, 1))
res_har24[i, 13] <- exp(rowMeans(log(res_har24[i, 9:12])))
}
train_mean <- cbind(
res_har4$M_TN, res_har8$M_TN, res_har12$M_TN, res_har16$M_TN, res_har24$M_TN
)
boxplot(train_mean, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets", xlab = "number of harmonics", ylab = "accuracy")
test_mean <- cbind(
res_har4$M_TS, res_har8$M_TS, res_har12$M_TS, res_har16$M_TS, res_har24$M_TS
)
boxplot(test_mean, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for test datasets", xlab = "number of harmonics", ylab = "accuracy")
train_B <- cbind(
res_har4$TN_B, res_har8$TN_B, res_har12$TN_B, res_har16$TN_B, res_har24$TN_B
)
boxplot(train_B, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for training datasets", sub = "Species B", xlab = "number of harmonics", ylab = "accuracy")
train_K <- cbind(
res_har4$TN_K, res_har8$TN_K, res_har12$TN_K, res_har16$TN_K, res_har24$TN_K
)
boxplot(train_K, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for training datasets", sub = "Species K", xlab = "number of harmonics", ylab = "accuracy")
train_N <- cbind(
res_har4$TN_N, res_har8$TN_N, res_har12$TN_N, res_har16$TN_N, res_har24$TN_N
)
boxplot(train_N, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for training datasets", sub = "Species N", xlab = "number of harmonics", ylab = "accuracy")
train_Q <- cbind(
res_har4$TN_Q, res_har8$TN_Q, res_har12$TN_Q, res_har16$TN_Q, res_har24$TN_Q
)
boxplot(train_Q, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for training datasets", sub = "Species Q", xlab = "number of harmonics", ylab = "accuracy")
test_B <- cbind(
res_har4$TS_B, res_har8$TS_B, res_har12$TS_B, res_har16$TS_B, res_har24$TS_B
)
boxplot(test_B, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species B", xlab = "number of harmonics", ylab = "accuracy")
test_K <- cbind(
res_har4$TS_K, res_har8$TS_K, res_har12$TS_K, res_har16$TS_K, res_har24$TS_K
)
boxplot(test_K, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species K", xlab = "number of harmonics", ylab = "accuracy")
test_N <- cbind(
res_har4$TS_N, res_har8$TS_N, res_har12$TS_N, res_har16$TS_N, res_har24$TS_N
)
boxplot(test_N, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species N", xlab = "number of harmonics", ylab = "accuracy")
test_Q <- cbind(
res_har4$TS_Q, res_har8$TS_Q, res_har12$TS_Q, res_har16$TS_Q, res_har24$TS_Q
)
boxplot(test_Q, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species Q", xlab = "number of harmonics", ylab = "accuracy")
results_accuracy_har8 <- as.data.frame(matrix(NA, nrow = n, ncol = 4))
colnames(results_accuracy_har8) <- varnames2
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_8), size = 21)
train_2 <- sample(nrow(K_shape_8), size = 21)
train_3 <- sample(nrow(N_shape_8), size = 21)
train_4 <- sample(nrow(Q5_shape_8), size = 21)
train_subset_1 <- as.data.frame(B_shape_8[train_1, ])
train_subset_2 <- as.data.frame(K_shape_8[train_2, ])
train_subset_3 <- as.data.frame(N_shape_8[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_8[train_4, ])
BKNQ8.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_8[-train_1, ])
test_2 <- as.data.frame(K_shape_8[-train_2, ])
test_3 <- as.data.frame(N_shape_8[-train_3, ])
test_4 <- as.data.frame(Q5_shape_8[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ8.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
# moment
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "moment", na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
results_accuracy_har8[i, 1] <- mean(BKNQ8.lda.predict$class ==
BKNQ8.test$Species2)
# mle
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "mle", na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
results_accuracy_har8[i, 2] <- mean(BKNQ8.lda.predict$class ==
BKNQ8.test$Species2)
# mve
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "mve", na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
results_accuracy_har8[i, 3] <- mean(BKNQ8.lda.predict$class ==
BKNQ8.test$Species2)
# t
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "t", na.action = na.omit)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
results_accuracy_har8[i, 4] <- mean(BKNQ8.lda.predict$class ==
BKNQ8.test$Species2)
}
results_accuracy_har12 <- as.data.frame(matrix(NA, nrow = n, ncol = 4))
colnames(results_accuracy_har12) <- varnames2
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_12), size = 21)
train_2 <- sample(nrow(K_shape_12), size = 21)
train_3 <- sample(nrow(N_shape_12), size = 21)
train_4 <- sample(nrow(Q5_shape_12), size = 21)
train_subset_1 <- as.data.frame(B_shape_12[train_1, ])
train_subset_2 <- as.data.frame(K_shape_12[train_2, ])
train_subset_3 <- as.data.frame(N_shape_12[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_12[train_4, ])
BKNQ12.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_12[-train_1, ])
test_2 <- as.data.frame(K_shape_12[-train_2, ])
test_3 <- as.data.frame(N_shape_12[-train_3, ])
test_4 <- as.data.frame(Q5_shape_12[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ12.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
# moment
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "moment", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
results_accuracy_har12[i, 1] <- mean(BKNQ12.lda.predict$class ==
BKNQ12.test$Species2)
# mle
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "mle", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
results_accuracy_har12[i, 2] <- mean(BKNQ12.lda.predict$class ==
BKNQ12.test$Species2)
# mve
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "mve", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
results_accuracy_har12[i, 3] <- mean(BKNQ12.lda.predict$class ==
BKNQ12.test$Species2)
# t
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "t", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
results_accuracy_har12[i, 4] <- mean(BKNQ12.lda.predict$class ==
BKNQ12.test$Species2)
}
method_comparison <- as.data.frame(matrix(NA, nrow = 8, ncol = 3))
colnames(method_comparison) <- varnames3
method_comparison[, 1] <- c(8, 8, 8, 8, 12, 12, 12, 12)
method_comparison[, 2] <- c("moment", "mle", "mve", "t", "moment", "mle", "mve", "t")
method_comparison[, 3] <- c(
exp(mean(log(results_accuracy_har8$moment))),
exp(mean(log(results_accuracy_har8$mle))),
exp(mean(log(results_accuracy_har8$mve))),
exp(mean(log(results_accuracy_har8$t))),
exp(mean(log(results_accuracy_har12$moment))),
exp(mean(log(results_accuracy_har12$mle))),
exp(mean(log(results_accuracy_har12$mve))),
exp(mean(log(results_accuracy_har12$t)))
)
method_comparison
## harmonic method mean
## 1 8 moment 0.9442242
## 2 8 mle 0.9442242
## 3 8 mve 0.9272179
## 4 8 t 0.9445926
## 5 12 moment 0.9241789
## 6 12 mle 0.9241789
## 7 12 mve 0.8779709
## 8 12 t 0.9260914
BKNQ8.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
method = "moment",
data = shape_8,
na.action = na.omit)
BKNQ8.lda.values <- predict(BKNQ8.lda)
ggord(BKNQ8.lda, shape_8$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "2"), hull = TRUE, ellipse = FALSE)
ggord(BKNQ8.lda, shape_8$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "2"), ellipse_pro = 0.95)
ggord(BKNQ8.lda, shape_8$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "3"), hull = TRUE, ellipse = FALSE)
ggord(BKNQ8.lda, shape_8$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "3"))
res_har4_crowded <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har4_crowded) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_4_crowded), size = 21)
train_2 <- sample(nrow(K_shape_4_crowded), size = 21)
train_3 <- sample(nrow(N_shape_4_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_4_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_4_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_4_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_4_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_4_crowded[train_4, ])
BKNQ4.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_4_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_4_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_4_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_4_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ4.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ4.train.lda <- lda(Species2 ~
A2 + A3 + A4 +
B2 + B3 + B4,
data = BKNQ4.train, na.action = na.omit
)
res_har4_crowded[i, 1:3] <- prop.table(BKNQ4.train.lda$svd^2)
BKNQ4.train.lda.values <- predict(BKNQ4.train.lda)
prediction.lda.accuracy4 <- table(BKNQ4.train$Species2, BKNQ4.train.lda.values$class)
res_har4_crowded[i, 4:7] <-
t(as.matrix(diag(prop.table(prediction.lda.accuracy4, 1))))
res_har4_crowded[i, 8] <- exp(rowMeans(log(res_har4_crowded[i, 4:7])))
BKNQ4.lda.predict.posteriors <- as.data.frame(predict(BKNQ4.train.lda,
newdata = BKNQ4.test)$posterior)
BKNQ4.lda.predict <- predict(BKNQ4.train.lda, newdata = BKNQ4.test)
prediction.accuracy4 <- table(BKNQ4.test$Species2, BKNQ4.lda.predict$class)
res_har4_crowded[i, 9:12] <- diag(prop.table(prediction.accuracy4, 1))
res_har4_crowded[i, 13] <- exp(rowMeans(log(res_har4_crowded[i, 9:12])))
}
res_har8_crowded <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har8_crowded) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_8_crowded), size = 21)
train_2 <- sample(nrow(K_shape_8_crowded), size = 21)
train_3 <- sample(nrow(N_shape_8_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_8_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_8_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_8_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_8_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_8_crowded[train_4, ])
BKNQ8.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_8_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_8_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_8_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_8_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ8.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, na.action = na.omit
)
res_har8_crowded[i, 1:3] <- prop.table(BKNQ8.train.lda$svd^2)
BKNQ8.train.lda.values <- predict(BKNQ8.train.lda)
prediction.lda.accuracy8 <- table(BKNQ8.train$Species2,
BKNQ8.train.lda.values$class)
res_har8_crowded[i, 4:7] <- t(as.matrix(diag(prop.table(
prediction.lda.accuracy8, 1))))
res_har8_crowded[i, 8] <- exp(rowMeans(log(res_har8_crowded[i, 4:7])))
BKNQ8.lda.predict.posteriors <- as.data.frame(predict(BKNQ8.train.lda,
newdata = BKNQ8.test)$posterior)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda, newdata = BKNQ8.test)
prediction.accuracy8 <- table(BKNQ8.test$Species2,
BKNQ8.lda.predict$class)
res_har8_crowded[i, 9:12] <- diag(prop.table(prediction.accuracy8, 1))
res_har8_crowded[i, 13] <- exp(rowMeans(log(res_har8_crowded[i, 9:12])))
}
res_har12_crowded <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har12_crowded) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_12_crowded), size = 21)
train_2 <- sample(nrow(K_shape_12_crowded), size = 21)
train_3 <- sample(nrow(N_shape_12_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_12_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_12_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_12_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_12_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_12_crowded[train_4, ])
BKNQ12.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_12_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_12_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_12_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_12_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ12.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, na.action = na.omit
)
res_har12_crowded[i, 1:3] <- prop.table(BKNQ12.train.lda$svd^2)
BKNQ12.train.lda.values <- predict(BKNQ12.train.lda)
prediction.lda.accuracy12 <- table(BKNQ12.train$Species2,
BKNQ12.train.lda.values$class)
res_har12_crowded[i, 4:7] <- t(as.matrix(diag(prop.table(
prediction.lda.accuracy12, 1))))
res_har12_crowded[i, 8] <- exp(rowMeans(log(res_har12_crowded[i, 4:7])))
BKNQ12.lda.predict.posteriors <- as.data.frame(predict(BKNQ12.train.lda,
newdata = BKNQ12.test)$posterior)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda, newdata = BKNQ12.test)
prediction.accuracy12 <- table(BKNQ12.test$Species2,
BKNQ12.lda.predict$class)
res_har12_crowded[i, 9:12] <- diag(prop.table(prediction.accuracy12, 1))
res_har12_crowded[i, 13] <- exp(rowMeans(log(res_har12_crowded[i, 9:12])))
}
res_har16_crowded <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har16_crowded) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_16_crowded), size = 21)
train_2 <- sample(nrow(K_shape_16_crowded), size = 21)
train_3 <- sample(nrow(N_shape_16_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_16_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_16_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_16_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_16_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_16_crowded[train_4, ])
BKNQ16.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_16_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_16_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_16_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_16_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ16.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ16.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 + A13 + A14 + A15 + A16 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12 + B13 + B14 + B15 + B16,
data = BKNQ16.train, na.action = na.omit
)
res_har16_crowded[i, 1:3] <- prop.table(BKNQ16.train.lda$svd^2)
BKNQ16.train.lda.values <- predict(BKNQ16.train.lda)
prediction.lda.accuracy16 <- table(BKNQ16.train$Species2,
BKNQ16.train.lda.values$class)
res_har16_crowded[i, 4:7] <- t(as.matrix(diag(prop.table(prediction.lda.accuracy16, 1))))
res_har16_crowded[i, 8] <- exp(rowMeans(log(res_har16_crowded[i, 4:7])))
BKNQ16.lda.predict.posteriors <- as.data.frame(
predict(BKNQ16.train.lda,
newdata = BKNQ16.test)$posterior)
BKNQ16.lda.predict <- predict(BKNQ16.train.lda, newdata = BKNQ16.test)
prediction.accuracy16 <- table(BKNQ16.test$Species2,
BKNQ16.lda.predict$class)
res_har16_crowded[i, 9:12] <- diag(prop.table(prediction.accuracy16, 1))
res_har16_crowded[i, 13] <- exp(rowMeans(log(res_har16_crowded[i, 9:12])))
}
res_har24_crowded <- as.data.frame(matrix(NA, nrow = n, ncol = 13))
colnames(res_har24_crowded) <- varnames
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_24_crowded), size = 21)
train_2 <- sample(nrow(K_shape_24_crowded), size = 21)
train_3 <- sample(nrow(N_shape_24_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_24_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_24_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_24_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_24_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_24_crowded[train_4, ])
BKNQ24.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_24_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_24_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_24_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_24_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ24.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
BKNQ24.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 + A13 + A14 + A15 +
A16 + A17 + A18 + A19 + A20 + A21 + A22 + A23 + A24 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12 + B13 + B14 + B15 +
B16 + B17 + B18 + B19 + B20 + B21 + B22 + B23 + B24,
data = BKNQ24.train, na.action = na.omit
)
res_har24_crowded[i, 1:3] <- prop.table(BKNQ24.train.lda$svd^2)
BKNQ24.train.lda.values <- predict(BKNQ24.train.lda)
prediction.lda.accuracy24 <- table(BKNQ24.train$Species2,
BKNQ24.train.lda.values$class)
res_har24_crowded[i, 4:7] <- t(as.matrix(diag(prop.table(
prediction.lda.accuracy24, 1))))
res_har24_crowded[i, 8] <- exp(rowMeans(log(res_har24_crowded[i, 4:7])))
BKNQ24.lda.predict.posteriors <- as.data.frame(
predict(BKNQ24.train.lda,
newdata = BKNQ24.test)$posterior)
BKNQ24.lda.predict <- predict(BKNQ24.train.lda,
newdata = BKNQ24.test)
prediction.accuracy24 <- table(BKNQ24.test$Species2,
BKNQ24.lda.predict$class)
res_har24_crowded[i, 9:12] <- diag(prop.table(prediction.accuracy24, 1))
res_har24_crowded[i, 13] <- exp(rowMeans(log(res_har24_crowded[i, 9:12])))
}
train_mean <- cbind(res_har4_crowded$M_TN,
res_har8_crowded$M_TN,
res_har12_crowded$M_TN,
res_har16_crowded$M_TN,
res_har24_crowded$M_TN)
boxplot(train_mean, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets",
xlab = "number of harmonics", ylab = "accuracy")
test_mean <- cbind(res_har4_crowded$M_TS,
res_har8_crowded$M_TS,
res_har12_crowded$M_TS,
res_har16_crowded$M_TS,
res_har24_crowded$M_TS)
boxplot(test_mean, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for test datasets",
xlab = "number of harmonics", ylab = "accuracy")
train_B <- cbind(res_har4_crowded$TN_B,
res_har8_crowded$TN_B,
res_har12_crowded$TN_B,
res_har16_crowded$TN_B,
res_har24_crowded$TN_B)
boxplot(train_B, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets", sub = "Species B",
xlab = "number of harmonics", ylab = "accuracy")
train_K <- cbind(res_har4_crowded$TN_K,
res_har8_crowded$TN_K,
res_har12_crowded$TN_K,
res_har16_crowded$TN_K,
res_har24_crowded$TN_K)
boxplot(train_K, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets", sub = "Species K",
xlab = "number of harmonics", ylab = "accuracy")
train_N <- cbind(res_har4_crowded$TN_N,
res_har8_crowded$TN_N,
res_har12_crowded$TN_N,
res_har16_crowded$TN_N,
res_har24_crowded$TN_N)
boxplot(train_N, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets", sub = "Species N",
xlab = "number of harmonics", ylab = "accuracy")
train_Q <- cbind(res_har4_crowded$TN_Q,
res_har8_crowded$TN_Q,
res_har12_crowded$TN_Q,
res_har16_crowded$TN_Q,
res_har24_crowded$TN_Q)
boxplot(train_Q, names = c(4, 8, 12, 16, 24), ylim = c(0.5, 1.0))
title(main = "lda prediction for training datasets", sub = "Species Q",
xlab = "number of harmonics", ylab = "accuracy")
test_B <- cbind(res_har4_crowded$TS_B,
res_har8_crowded$TS_B,
res_har12_crowded$TS_B,
res_har16_crowded$TS_B,
res_har24_crowded$TS_B)
boxplot(test_B, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species B",
xlab = "number of harmonics", ylab = "accuracy")
test_K <- cbind(res_har4_crowded$TS_K,
res_har8_crowded$TS_K,
res_har12_crowded$TS_K,
res_har16_crowded$TS_K,
res_har24_crowded$TS_K)
boxplot(test_K, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species K",
xlab = "number of harmonics", ylab = "accuracy")
test_N <- cbind(res_har4_crowded$TS_N,
res_har8_crowded$TS_N,
res_har12_crowded$TS_N,
res_har16_crowded$TS_N,
res_har24_crowded$TS_N)
boxplot(test_N, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species N",
xlab = "number of harmonics", ylab = "accuracy")
test_Q <- cbind(res_har4_crowded$TS_Q,
res_har8_crowded$TS_Q,
res_har12_crowded$TS_Q,
res_har16_crowded$TS_Q,
res_har24_crowded$TS_Q)
boxplot(test_Q, names = c(4, 8, 12, 16, 24), ylim = c(0.2, 1.0))
title(main = "lda prediction for test datasets", sub = "Species Q",
xlab = "number of harmonics", ylab = "accuracy")
results_accuracy_har8 <- as.data.frame(matrix(NA, nrow = n, ncol = 4))
colnames(results_accuracy_har8) <- varnames2
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_8_crowded), size = 21)
train_2 <- sample(nrow(K_shape_8_crowded), size = 21)
train_3 <- sample(nrow(N_shape_8_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_8_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_8_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_8_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_8_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_8_crowded[train_4, ])
BKNQ8.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_8_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_8_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_8_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_8_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ8.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
# moment
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "moment",
na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda,
newdata = BKNQ8.test)
results_accuracy_har8[i, 1] <- mean(
BKNQ8.lda.predict$class == BKNQ8.test$Species2
)
# mle
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "mle",
na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda,
newdata = BKNQ8.test)
results_accuracy_har8[i, 2] <- mean(
BKNQ8.lda.predict$class == BKNQ8.test$Species2
)
# mve
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "mve",
na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda,
newdata = BKNQ8.test)
results_accuracy_har8[i, 3] <- mean(
BKNQ8.lda.predict$class == BKNQ8.test$Species2
)
# t
BKNQ8.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
data = BKNQ8.train, method = "t",
na.action = na.omit
)
BKNQ8.lda.predict <- predict(BKNQ8.train.lda,
newdata = BKNQ8.test)
results_accuracy_har8[i, 4] <- mean(
BKNQ8.lda.predict$class == BKNQ8.test$Species2
)
}
results_accuracy_har12 <- as.data.frame(matrix(NA, nrow = n, ncol = 4))
colnames(results_accuracy_har12) <- varnames2
set.seed(333)
for (i in 1:n) {
train_1 <- sample(nrow(B_shape_12_crowded), size = 21)
train_2 <- sample(nrow(K_shape_12_crowded), size = 21)
train_3 <- sample(nrow(N_shape_12_crowded), size = 21)
train_4 <- sample(nrow(Q5_shape_12_crowded), size = 21)
train_subset_1 <- as.data.frame(B_shape_12_crowded[train_1, ])
train_subset_2 <- as.data.frame(K_shape_12_crowded[train_2, ])
train_subset_3 <- as.data.frame(N_shape_12_crowded[train_3, ])
train_subset_4 <- as.data.frame(Q5_shape_12_crowded[train_4, ])
BKNQ12.train <- as.data.frame(rbind(train_subset_1, train_subset_2,
train_subset_3, train_subset_4))
test_1 <- as.data.frame(B_shape_12_crowded[-train_1, ])
test_2 <- as.data.frame(K_shape_12_crowded[-train_2, ])
test_3 <- as.data.frame(N_shape_12_crowded[-train_3, ])
test_4 <- as.data.frame(Q5_shape_12_crowded[-train_4, ])
test_subset_1 <- test_1[sample(nrow(test_1), size = 9), ]
test_subset_2 <- test_2[sample(nrow(test_2), size = 9), ]
test_subset_3 <- test_3[sample(nrow(test_3), size = 9), ]
test_subset_4 <- test_4[sample(nrow(test_4), size = 9), ]
BKNQ12.test <- as.data.frame(rbind(test_subset_1, test_subset_2,
test_subset_3, test_subset_4))
# moment
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "moment", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda,
newdata = BKNQ12.test)
results_accuracy_har12[i, 1] <- mean(
BKNQ12.lda.predict$class == BKNQ12.test$Species2
)
# mle
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "mle", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda,
newdata = BKNQ12.test)
results_accuracy_har12[i, 2] <- mean(
BKNQ12.lda.predict$class == BKNQ12.test$Species2
)
# mve
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "mve", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda,
newdata = BKNQ12.test)
results_accuracy_har12[i, 3] <- mean(
BKNQ12.lda.predict$class == BKNQ12.test$Species2
)
# t
BKNQ12.train.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 +
B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12,
data = BKNQ12.train, method = "t", na.action = na.omit
)
BKNQ12.lda.predict <- predict(BKNQ12.train.lda,
newdata = BKNQ12.test)
results_accuracy_har12[i, 4] <- mean(
BKNQ12.lda.predict$class == BKNQ12.test$Species2
)
}
method_comparison <- as.data.frame(matrix(NA, nrow = 8, ncol = 3))
colnames(method_comparison) <- varnames3
method_comparison[, 1] <- c(8, 8, 8, 8,
12, 12, 12, 12)
method_comparison[, 2] <- c("moment", "mle", "mve", "t",
"moment", "mle", "mve", "t")
method_comparison[, 3] <- c(
exp(mean(log(results_accuracy_har8$moment))),
exp(mean(log(results_accuracy_har8$mle))),
exp(mean(log(results_accuracy_har8$mve))),
exp(mean(log(results_accuracy_har8$t))),
exp(mean(log(results_accuracy_har12$moment))),
exp(mean(log(results_accuracy_har12$mle))),
exp(mean(log(results_accuracy_har12$mve))),
exp(mean(log(results_accuracy_har12$t)))
)
method_comparison
## harmonic method mean
## 1 8 moment 0.9676779
## 2 8 mle 0.9676779
## 3 8 mve 0.9510688
## 4 8 t 0.9677550
## 5 12 moment 0.9542000
## 6 12 mle 0.9542000
## 7 12 mve 0.9226735
## 8 12 t 0.9566071
BKNQ8_crowded.lda <- lda(Species2 ~
A2 + A3 + A4 + A5 + A6 + A7 + A8 +
B2 + B3 + B4 + B5 + B6 + B7 + B8,
method = "moment", data = BKNQ8_crowded,
na.action = na.omit)
BKNQ8_crowded.lda.values <- predict(BKNQ8_crowded.lda)
ggord(BKNQ8_crowded.lda, BKNQ8_crowded$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "2"), hull = TRUE, ellipse = FALSE)
ggord(BKNQ8_crowded.lda, BKNQ8_crowded$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "2"), ellipse_pro = 0.95)
ggord(BKNQ8_crowded.lda, BKNQ8_crowded$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "3"), hull = TRUE, ellipse = FALSE)
ggord(BKNQ8_crowded.lda, BKNQ8_crowded$Species2, size = 2.5, vec_ext = 0.03, axes = c("1", "3"))