# Linear discriminant analysis of morphogroups

# file generated in script 9
load('data/SDM/climExtracts.Rdata')

# collapse landcover types into closed and open habitats
# LC1+2+3+4 = forests (needleleaf, broadleaf, mixed/other trees)
# LC5+6+8+10+11 = open habitats (shrubs, herbaceous, flooded vegetation, snow/ice, barren)
closedLC <- paste0('landcover', 1:4)
openLC <- paste0('landcover', c(5,6,8)) # excluding 10 (snow/ice) and 11 (barren)
coastal_pres <- cbind(coastal_pres, openLC = rowSums(coastal_pres[, openLC]))
coastal_pres <- cbind(coastal_pres, closedLC = rowSums(coastal_pres[, closedLC]))
interior_pres <- cbind(interior_pres, openLC = rowSums(interior_pres[, openLC]))
interior_pres <- cbind(interior_pres, closedLC = rowSums(interior_pres[, closedLC]))
rockies_pres <- cbind(rockies_pres, openLC = rowSums(rockies_pres[, openLC]))
rockies_pres <- cbind(rockies_pres, closedLC = rowSums(rockies_pres[, closedLC]))

e_bg <- cbind(e_bg, openLC = rowSums(e_bg[, openLC]))
e_bg <- cbind(e_bg, closedLC = rowSums(e_bg[, closedLC]))

e_allOcc <- cbind(e_allOcc, openLC = rowSums(e_allOcc[, openLC]))
e_allOcc <- cbind(e_allOcc, closedLC = rowSums(e_allOcc[, closedLC]))

# drop landcover types but keep open / closed
coastal_pres <- coastal_pres[, !grepl('landcover', colnames(coastal_pres))]
interior_pres <- interior_pres[, !grepl('landcover', colnames(interior_pres))]
rockies_pres <- rockies_pres[, !grepl('landcover', colnames(rockies_pres))]


climVar <- setdiff(colnames(coastal_pres), grep('landcover|LC', colnames(coastal_pres), value = TRUE))

# vifVar from SDM prepwork (leaving out landcover classes, and adding just closedLC)
thinnedVar <- c("CHELSA_bioclim_02", "CHELSA_bioclim_03", "CHELSA_bioclim_07", "CHELSA_bioclim_14", "CHELSA_bioclim_15", "CHELSA_bioclim_18", "CHELSA_bioclim_19", "climaticMoistureIndex", "embergerQ", "minTempWarmest", "PETDriestQuarter", "PETseasonality", "PETWettestQuarter", 'closedLC')


cols <- c(fullSp = 'darkslateblue', coastal = 'yellow3', interior = 'skyblue3', coastalInterior = '#31a354', rockies = 'purple')
symbs <- c(fullSp = 25, coastal = 21, interior = 22, coastalInterior = 23, rockies = 24)
transp <- 0.5


varInd <- thinnedVar

anyNA(coastal_pres[, varInd])
anyNA(interior_pres[, varInd])
anyNA(rockies_pres[, varInd])

resp <- c(rep('coastal', nrow(coastal_pres)), rep('interior', nrow(interior_pres)), rep('rockies', nrow(rockies_pres)))
dat <- rbind.data.frame(coastal_pres[, varInd], interior_pres[, varInd], rockies_pres[, varInd])
dat <- cbind.data.frame(resp, dat)

lda.clim <- MASS::lda(resp ~ ., data = dat)
lda.clim.CV <- MASS::lda(resp ~ ., data = dat, CV = TRUE)
lda.clim

# Assess the accuracy of the prediction
# percent correct for each category of resp
ct <- table(dat$resp, lda.clim.CV$class)
diag(prop.table(ct, 1))
# total percent correct
sum(diag(prop.table(ct)))

# percent classified correct
round(apply(ct, 1, function(x) x / sum(x))*100, 1)


# plot loadings
ldaLoadings <- lda.clim$scaling
rownames(ldaLoadings) <- gsub('_', ' ', rownames(ldaLoadings))
rownames(ldaLoadings) <- gsub('CHELSA\\s', '', rownames(ldaLoadings))
rownames(ldaLoadings) <- gsub('climatic', '', rownames(ldaLoadings))
rownames(ldaLoadings) <- gsub('LC', ' landcover', rownames(ldaLoadings))

pdf(file = 'output/FigA12_LDA_loadings.pdf', width = 9, height = 7)
	par(mfrow=c(1,2), mar = c(5,3,1,1))
	barplot(sort(abs(ldaLoadings[,1]), decreasing = TRUE), cex.names=0.5, las=3, ylim = c(0, max(lda.clim$scaling)))
	mtext('LD 1 coefficients of linear discriminants', side = 2, line = 2)
	barplot(sort(abs(ldaLoadings[,2]), decreasing = TRUE), cex.names=0.5, las=3, ylim = c(0, max(lda.clim$scaling)))
	mtext('LD 2 coefficients of linear discriminants', side = 2, line = 2)
dev.off()

# for script 11
labels <- c(rep('Coastal', nrow(coastal_pres)), rep('Interior', nrow(interior_pres)), rep('Rocky', nrow(rockies_pres)))
write.csv(cbind.data.frame(morphogroup = labels, predict(lda.clim)$x), 'data/DFAdat/climDFA.csv', row.names = FALSE)



