### Set up input for PopArt from mtDNA nexus file ###
require("pegas")
require("xlsx")
require("Hmisc")

pop_data<-read.xlsx("../data/2_Cyanocitta_microsats_19Apr2022_FINAL.xlsx",stringsAsFactors=F,sheetName="Pop list with ind #")
head(pop_data)

pop_data$Option.1.Group..[pop_data$Option.1.Group..=="2"]<-1 #Remove Southern California Group
pop_data$Option.1.Group..[pop_data$Option.1.Group..=="3"]<-2 #Remove Southern California Group
pop_data$Option.1.Group..[pop_data$Option.1.Group..=="4"]<-3 #Remove Southern California Group
pop_data$Option.1.Group..[pop_data$Option.1.Group..=="5"]<-4 #Remove Southern California Group

c_st<-read.nexus.data("../data/1_All_Sequences - Edited_forPopART.nex")

pop_id<-sapply(strsplit(names(c_st),"_"),function(x) x[1])
group_vec<-vector()
for(i in 1:length(pop_id)){
	group_vec[i]<-pop_data$Option.1.Group..[which(as.character(pop_data$Pop..)==pop_id[i])]
}

c_st_haplotype<-haplotype(as.DNAbin(c_st))

hap_index<-attr(c_st_haplotype,"index")

sink("../output/Cst_popart_v3.nex")
cat("#NEXUS")
cat("\n\n")
cat("BEGIN TAXA;\n")
cat(paste("DIMENSIONS NTAX=",length(hap_index),";\n",sep=""))

cat("TAXLABELS\n")
cat(paste(paste("haplotype_",1:length(hap_index),sep=""),collapse="\n"))
cat("\n;\n")
cat("END;\n\n")

cat("BEGIN CHARACTERS;\n")
cat("DIMENSIONS NCHAR=1041;\n")
cat("FORMAT DATATYPE=DNA MISSING=? GAP=- ;\n")
cat("MATRIX\n\n")

for(i in 1:length(hap_index)){
	cat(paste("haplotype_",i," ",toupper(paste(c_st[[hap_index[[i]][1]]],collapse="")),"\n",sep=""))
}
cat("\n;\n\nEND;\n\n")

cat("BEGIN TRAITS;\n\n")
cat("Dimensions NTRAITS=4;\n")
cat("Format labels=yes missing=? separator=Comma;\n")
cat("TraitLabels Pacific Interior ContactZone Rockies;\n")
cat("Matrix\n\n")

for(i in 1:length(hap_index)){
	foo<-rep(0,4)
	names(foo)<-1:4
	bar<-table(group_vec[hap_index[[i]]])
	foo[names(bar)]<-bar
	
	cat(paste("haplotype_",i," ",paste(foo,collapse=","),"\n",sep=""))
	
}
cat(";\n\nEND;\n")
sink()

sort(sapply(hap_index,length))
attr(c_st_haplotype,"index")[[5]]

pop_data[hap_index[[5]],]

### FIGURE OUT WHICH HAPLOTYPES BELONG TO WHICH GROUP ###
popART<-read.nexus.data("../output/Cst_popart_v3.nex")
popART_nj<-dist.dna(as.DNAbin(popART))
popART_nj<-nj(popART_nj)

# pdf("../output/popartnj.pdf")
# plot(popART_nj,show.tip.label=F)
# nodelabels(cex=0.5)
# dev.off()

###467 is node that corresponds to rocky mountain clade in nj tree

hoi<-sapply(strsplit(extract.clade(popART_nj,467)$tip.label,"_"),function(x) x[2])

### Figure haplotype frequency for HZAR ###
hzar_pops<-c(41,43,49,50,51,52,53,54,55,60,61)

freq_vec<-vector()
noi<-vector()
for(i in 1:length(hzar_pops)){
	ioi<-which(pop_id==hzar_pops[i])
	
	rocky_count<-0
	interior_count<-0
	for(j in 1:length(ioi)){
		if(which(sapply(hap_index,function(x) ioi[j] %in% x)) %in% hoi){
			rocky_count<-rocky_count+1
		}else{
			interior_count<-interior_count+1
		}
	}
	freq_vec[i]<-interior_count/(interior_count+rocky_count)
	noi[i]<-interior_count +rocky_count
}

locs<-read.table("../utility/Cst_msat_clinal_locality.txt")

hzar_mtDNA<-data.frame(locs,n=noi,mtDNA_interior=freq_vec,mtDNA_rocky=1-freq_vec)

write.table(hzar_mtDNA,file="../output/hzar_mtDNA_input.txt",row.names=F,quote=F)