# Set working directory
# setwd()
library(adegenet)

# Read in microsatellite data in fstat input file format (.dat)
Cst.allpops <- read.fstat("../data/Cst_msat_DAPC_no_contact.dat", quiet=FALSE)

# Examine Cst.allpops object and verify that the dataset information is correct
Cst.allpops
# If done correctly, object class should be genind 
is.genind(Cst.allpops)

# Since group memberships are known beforehand, we can skip find.clusters step.
# We jump straight into dapc, retaining a high number of PCs first (e.g., 100),
# and then we test for the optimal number of PCs to retain.

##### 1st DAPC step; 100 PCs and 3 discriminant functions (for 3 clusters) #####
dapc.Cst <- dapc(Cst.allpops, var.contrib = TRUE, scale = FALSE, 
                 n.pca = 100, n.da = 3)

test <- optim.a.score(dapc.Cst)

##### 2nd DAPC step; optimal number PCs retained (test$best) #####
dapc.Cst <- dapc(Cst.allpops, var.contrib = TRUE, scale = FALSE, 
                 n.pca = test$best, n.da = 3)

##### Summary of information in dapc object #####
dapc.Cst

# Quick examination of scatter plot of analysis output 
scatter(dapc.Cst)

# Quick examination of inferred "inf" membership assignments vs. original "ori" membership assignments
table.value(table(pop(Cst.allpops), dapc.Cst$assign), col.lab=paste("inf", 1:3),row.lab=paste("ori", 1:3))

# Quick examination of compoplot for a STRUCTURE plot-like visualization of the analysis output
Cst.col <- c("#CDCD00","#87CEEB","#A020F0") # Colors for the 3 clusters
compoplot(dapc.Cst, posi="bottomright",
          txt.leg=paste("Cluster", 1:3), lab="",
          xlab="individuals", col=Cst.col)


##### Finding percent explained by PCs 1&2 #####
LD1 <- dapc.Cst$eig[1]/sum(dapc.Cst$eig) #Percent explained by first discriminant axis
LD2 <- dapc.Cst$eig[2]/sum(dapc.Cst$eig) #Percent explained by second discriminant axis


##### Analysis outputs #####
dapc.Cst$ind.coord # coords on the 2 LD axes
pop(Cst.allpops) # a priori group assignment
dapc.Cst$assign # group membership as assigned by DAPC

# Combine vectors above into a data frame
dapc.output <- data.frame(pop(Cst.allpops), dapc.Cst$assign, dapc.Cst$ind.coord) 
colnames(dapc.output) <- c("Group", "Assigned", 
                           paste("LD1(",sprintf(LD1*100, fmt = '%#.1f'),"%)",sep = ""),
                           paste("LD2(",sprintf(LD2*100, fmt = '%#.1f'),"%)",sep = ""))
head(dapc.output)

# Write output data frame into a .csv file
write.csv(dapc.output, "../data/DFAdat/output.csv", row.names = TRUE)
# NOTE: our version of the output is also included in this directory
