### Make arlsumstat infile (.arp file) for 5 pops.
d <- read.csv(file = "~/works/works21/emily/data/repeat_number195inds15loci210309.csv")
pop <- c("SV", "SL", "M", "D", "G") # Pops from 1 to 5
d$Pop <- factor(d$Pop, levels = pop)
d$Pop <- as.numeric(d$Pop)

## Get locus names
locus.name <- grep("MA", names(d), value = TRUE)
locus.name <- locus.name[seq(1, length(locus.name), by = 2)]
locus.name <- substring(locus.name, 1, nchar(locus.name)-1)

n.locus <- length(locus.name)

d[is.na(d)] <- "?" # Missing data (but we have already removed missings)

## Define fucntion for making 5pop arp data
arp <- function (data, outfile) {
    n.pop <- length(unique(data$Pop)) # number of pops
    n.ind <- table(data$Pop)
    
    sink(outfile, append = FALSE)
    cat("[Profile]\n")
    cat('Title="Observed data"\n')
    cat(paste("NbSamples=", n.pop, "\n\n", sep = ""))
    cat("GenotypicData=1\n")
    cat("GameticPhase=0\n")
    cat("RecessiveData=0\n")
    cat("DataType=MICROSAT\n")
    cat("LocusSeparator=WHITESPACE\n")
    cat("MissingData='?'\n\n")
    
    cat("[Data]\n[[Samples]]\n")
    for (i in 1:n.pop) { # roop for populations
        cat(paste('SampleName="Sample ', i, '"\n', sep = ""))
        cat(paste("SampleSize= ", n.ind[i], "\n", sep = ""))
        cat("SampleData= {\n")
        temp <- data[data$Pop == i, ]
        for(j in 1:n.ind[i]) { # roop for individuals
            # 1st allele
            cat(paste(i, "_",  j, " 1 ", sep = ""))
            cat(unlist(temp[j, paste(locus.name, "a", sep = "")]), sep = " ")
            cat("\n")
            # 2nd allele
            cat("    ")
            cat(unlist(temp[j, paste(locus.name, "b", sep = "")]), sep = " ")
            cat("\n")
        }
        cat("}\n")
    }

    cat("\n[[Structure]]\n")
    cat('StructureName="Observed data"\n')
    cat("NbGroups=1\nGroup={\n")
    for (i in 1:n.pop) {
        cat(paste('"Sample ', i, '"\n', sep = ""))
    }
    cat("}")
    sink()
}

arp(data = d, outfile = "./Magnolia5pops.arp")


