## Hannes Rathmann 2020 (c) ## R code for building a table of utility estimates for different dental traits and trait combinations ## Ref. Rathmann and Reyes-Centeno (Testing the utility of dental morphological trait combinations for inferring human neutral genetic variation) PNAS. DOI: xxx ##################### load microsatellite loci mean size data ##################### file_matrix=choose.files(default="", caption="Select a CSV file with microsatellite loci mean sizes", multi=F) gen.dat <- read.csv(file=file_matrix) rownames(gen.dat) <- gen.dat[,1] gen.dat <- gen.dat[,-1] ##################### load dental trait frequency data ##################### file_matrix=choose.files(default="", caption="Select a CSV file with dental trait frequencies", multi=F) dent.dat <- read.csv(file=file_matrix) dent.dat <- dent.dat[,-1] dent.vars <- colnames(dent.dat) ##################### z-score transformation of trait data ##################### dent.dat[dent.dat==1.00] <- 0.995 dent.dat[dent.dat==0.00] <- 0.005 dent.dat <- qnorm(as.matrix(dent.dat)) ##################### START ##################### nr.run <- 1000 # number of resampling runs ##################### find all possible dental trait combinations and save as list ##################### # This will do the calculations for individual traits only, not trait combinations: comb.list <- as.list(dent.vars) # This will do the calculations for individual traits and all possible trait combinations. # WARNING: Only do this when your machine has ~ 100 GB RAM. # Depending on your machine, this calculation may run for several weeks. # Consider to run this in parallel on several cores or on a cluster to reduce computational time. # comb.list <- unlist(Map(combn,list(dent.vars),seq_along(dent.vars),simplify=F),recursive=F) ##################### set some indices and define empty vectors that will be filled in a loop ##################### nr.pops <- nrow(gen.dat) nr.pairs <- nr.pops*(nr.pops-1)/2 nr.loci <- length(gen.dat) ut <- upper.tri(matrix(NA,nr.pops,nr.pops)) trait.combination <- vector(mode="character",length(comb.list)) r.median <- vector(mode="integer",length(comb.list)) r.lower <- vector(mode="integer",length(comb.list)) r.upper <- vector(mode="integer",length(comb.list)) p.value <- vector(mode="integer",length(comb.list)) for (i in 1:length(comb.list)){ ##################### for each trait combination, calculate D2 distances ##################### trait.set <- comb.list[[i]] trait.combination[i] <- paste(trait.set,collapse="/") trait.nr <- length(trait.set) trait.set.data <- dent.dat[,trait.set] D2.mat <- as.matrix(dist(trait.set.data),method="euclidean")^2 D2.pair <- D2.mat[ut] ##################### and calculate trait-adjusted loci-resampled Dm2 distances ##################### Dm2.pair <- matrix(NA,nr.run,nr.pairs) for (j in 1:nr.run){ loci.sample <- sample.int(nr.loci,trait.nr) Dm2.mat <- as.matrix(dist(gen.dat[,loci.sample],method="euclidean"))^2/trait.nr Dm2.pair[j,] <- Dm2.mat[ut] } ##################### and compare D2 to trait-adjusted loci-resampled Dm2 ##################### r.range <- vector(mode="integer",nr.run) for (k in 1:nr.run){ r.range[k] <- cor(D2.pair,Dm2.pair[k,],method="pearson") } qs <- quantile(r.range,probs=c(0.025,0.5,0.975),names=F) r.median[i] <- qs[2] r.lower[i] <- qs[1] r.upper[i] <- qs[3] ##################### and calculate p-values via permutation of D2 ##################### perm.r.range <- vector(mode="integer",nr.run) for (l in 1:nr.run){ perm.sample <- sample.int(nr.pops) perm.D2.mat <- D2.mat[perm.sample,perm.sample] perm.D2.pair <- perm.D2.mat[ut] perm.r.range[l] <- cor(perm.D2.pair,Dm2.pair[l,],method="pearson") } p.value[i] <- sum(perm.r.range>=qs[2])/nr.run } ##################### create table with results ##################### trait.number <- lengths(comb.list) r.median <- round(r.median, digits=5) r.lower <- round(r.lower, digits=5) r.upper <- round(r.upper, digits=5) p.value <- round(p.adjust(p.value,method="BH"),digits=3) df <- data.frame(trait.combination,trait.number,r.median,r.lower,r.upper,p.value) write.table(df,file="combinat_table.txt",row.names=F) # save results as text file ##################### inspecting results ##################### head(df,27)