### Calculate FIS for fsc26
d <- read.csv(file = "~/works/works21/emily/data/repeat_number195inds15loci210309.csv")
pop <- c("SV", "SL", "M", "D", "G") # Pops from 1 to 5
d$Pop <- factor(d$Pop, levels = pop)
d$Pop <- as.numeric(d$Pop)
n.pop <- length(pop)

## Get locus names
locus.name <- grep("MA", names(d), value = TRUE)
locus.name <- locus.name[seq(1, length(locus.name), by = 2)]
locus.name <- substring(locus.name, 1, nchar(locus.name)-1)

n.locus <- length(locus.name)

d[is.na(d)] <- "?" # Missing data (but we have already removed missings)

## Calculate HE, HO and FIS
out <- matrix(NA, nrow = 1, ncol = n.pop*2)
out <- out2 <- as.data.frame(out)
names(out) <- c(paste("FIS_", 1:n.pop, sep = ""),
                paste("FISsd_", 1:n.pop, sep = ""))
names(out2) <- c(paste("H_", 1:n.pop, sep = ""), # For debug
                 paste("Hsd_", 1:n.pop, sep = ""))

for (i in 1:n.pop) {
    fis <- rep(NA, n.locus)
    he.locus <- rep(NA, n.locus) # For debug
    for (j in 1:n.locus) {
        locus <- paste(locus.name[j], c("a", "b"), sep = "")
        temp <- d[d$Pop == i, locus]
        temp <- na.omit(temp) # In this case, we have no NA, but this is an insurance
        ho <- sum(temp[, locus[1]] != temp[, locus[2]])/nrow(temp)
        allele <- unlist(temp)
        n.allele <- length(allele)
        allele.freq <- table(allele)/n.allele
        he <- ((n.allele)/(n.allele-1))*(1-sum(allele.freq^2)) # Unbiased one
        if (he != 0) {
            fis[j] <- (he - ho)/he
        }
        he.locus[j] <- he # For debug
    }
    #print(fis)
    fis <- na.omit(fis)
    out[, paste(c("FIS_", "FISsd_"), i, sep = "")] <- c(mean(fis), sd(fis))
    out2[, paste(c("H_", "Hsd_"), i, sep = "")] <- c(mean(he.locus), sd(he.locus)) # For debug
}

## print(out2) # I confirmed that he in out2 was the same as H calculated with arlsumstat

## Output
write.table(round(out, 6), file = "observed_fis_Magnolia5pops.txt",
            sep = "\t", quote = FALSE, row.names = FALSE)


