Original data from Dryad repository for the paper by Turner et al. 2014 (doi:10.5061/dryad.2br40).
Transformed to Turner2014_mus_fixed_recodeA.raw with PLINK (see log file Turner2014_mus_fixed_recodeA.log for details)
Staubach et al. 2012 data allowed us to determine which markers were fixed between the species and to orientate alleles.
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Parsed with column specification:
## cols(
## .default = col_integer(),
## FID = col_character(),
## IID = col_character(),
## PHENOTYPE = col_double()
## )
## See spec(...) for full column specifications.
sid_G <- G %>% select(-(1:6)) %>% names(.) %>% sub("_[1-2]$", "", .)
alleles <- read_csv("Determine_fixed_SNPs/species_alleles_fixed.csv") %>% filter(sid %in% sid_G)## Parsed with column specification:
## cols(
## sid = col_character(),
## M = col_integer(),
## D = col_integer()
## )
14220 fixed SNPs between the two pure species were retained.
Allele noted 1 (in the name of the SNP) must be the reference allele of the array. So we switch all counts to be of this allele.
G_DM <- G
for(i in 1:nrow(alleles)){
M <- alleles$M[i]
D <- alleles$D[i]
new <- rep(NA, nrow(G_DM))
new[G_DM[[i+6]] == M] <- "M"
new[G_DM[[i+6]] == D] <- "D"
new[G_DM[[i+6]] == 1] <- "H"
G_DM[[i+6]] <- new
}
write_csv(G_DM, "Turner2014_mus_fixed_DMHcoded.csv")Download the annotation file from Yang et al. 2009 to determine on which chromosomes are the SNPs.
annot_all <- read_csv("snpAnnotation.v2.merged.txt")
annot <- annot_all %>%
filter(`JAX SNP ID` %in% sid_G) %>%
filter(!duplicated(`JAX SNP ID`))
write_csv(annot, "snpAnnotation_Turner2014.csv")## Parsed with column specification:
## cols(
## .default = col_character(),
## `Strategy Number` = col_integer(),
## `SNP Position (Build 37)` = col_integer(),
## cM_Male = col_double(),
## cM_Female = col_double(),
## cM_Average = col_double(),
## `SNP Position (Build 36)` = col_integer(),
## `Transcript Number Located` = col_integer(),
## `Gene Number Located` = col_integer(),
## `CpG flag` = col_integer(),
## `Proximal NspI Site` = col_integer(),
## `Distal NspI Site` = col_integer(),
## `NspI Fragment Size` = col_integer(),
## `Proximal StyI Site` = col_integer(),
## `Distal StyI Site` = col_integer(),
## `StyI Fragment Size` = col_integer(),
## X = col_integer(),
## Y = col_integer(),
## DESTYPE = col_integer(),
## EXPOS = col_integer(),
## PLEN = col_integer()
## # ... with 11 more columns
## )
## See spec(...) for full column specifications.
annot <- annot[order(match(annot$`JAX SNP ID`, sid_G)),]
is_A <- annot$Chromosome != "chrX"
is_X <- annot$Chromosome == "chrX"## Parsed with column specification:
## cols(
## .default = col_character(),
## PAT = col_integer(),
## MAT = col_integer(),
## SEX = col_integer(),
## PHENOTYPE = col_double()
## )
## See spec(...) for full column specifications.
Count the number of homozygote and heterozygote markers on each type of chromosome.
m <- G_DM %>% select(-(1:6))
f <- G_DM$PHENOTYPE
n1a <- as.vector(apply(m[,is_A],MARGIN=1,FUN=function(z) {length(which(z =='D'))}))
n1x <- as.vector(apply(m[,is_X],MARGIN=1,FUN=function(z) {length(which(z =='D'))}))
n12a <- as.vector(apply(m[,is_A],MARGIN=1,FUN=function(z) {length(which(z =='H'))}))
n12x <- as.vector(apply(m[,is_X],MARGIN=1,FUN=function(z) {length(which(z =='H'))}))
n2a <- as.vector(apply(m[,is_A],MARGIN=1,FUN=function(z) {length(which(z =='M'))}))
n2x <- as.vector(apply(m[,is_X],MARGIN=1,FUN=function(z) {length(which(z =='M'))}))
z <- cbind(f,n1a,n2a,n12a,n1x,n2x,n12x)
write.table(z,row.names=FALSE,col.names=c("f",'n1a','n2a','n12a','n1x','n2x','n12x'),quote=F,sep='\t',file='Turner.data.txt')## Session info -------------------------------------------------------------
## setting value
## version R version 3.5.0 (2018-04-23)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate fr_FR.UTF-8
## tz Europe/London
## date 2018-06-28
## Packages -----------------------------------------------------------------
## package * version date source
## assertthat 0.2.0 2017-04-11 CRAN (R 3.5.0)
## backports 1.1.2 2017-12-13 CRAN (R 3.5.0)
## base * 3.5.0 2018-04-24 local
## bindr 0.1.1 2018-03-13 CRAN (R 3.5.0)
## bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.5.0)
## broom 0.4.4 2018-03-29 CRAN (R 3.5.0)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.5.0)
## cli 1.0.0 2017-11-05 CRAN (R 3.5.0)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.5.0)
## compiler 3.5.0 2018-04-24 local
## crayon 1.3.4 2017-09-16 CRAN (R 3.5.0)
## datasets * 3.5.0 2018-04-24 local
## devtools 1.13.5 2018-02-18 CRAN (R 3.5.0)
## digest 0.6.15 2018-01-28 CRAN (R 3.5.0)
## dplyr * 0.7.4 2017-09-28 CRAN (R 3.5.0)
## evaluate 0.10.1 2017-06-24 CRAN (R 3.5.0)
## forcats * 0.3.0 2018-02-19 CRAN (R 3.5.0)
## foreign 0.8-70 2017-11-28 CRAN (R 3.5.0)
## ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.5.0)
## glue 1.2.0 2017-10-29 CRAN (R 3.5.0)
## graphics * 3.5.0 2018-04-24 local
## grDevices * 3.5.0 2018-04-24 local
## grid 3.5.0 2018-04-24 local
## gtable 0.2.0 2016-02-26 CRAN (R 3.5.0)
## haven 1.1.1 2018-01-18 CRAN (R 3.5.0)
## hms 0.4.2 2018-03-10 CRAN (R 3.5.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.5.0)
## httr 1.3.1 2017-08-20 CRAN (R 3.5.0)
## jsonlite 1.5 2017-06-01 CRAN (R 3.5.0)
## knitr 1.20 2018-02-20 CRAN (R 3.5.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.5.0)
## lazyeval 0.2.1 2017-10-29 CRAN (R 3.5.0)
## lubridate 1.7.4 2018-04-11 CRAN (R 3.5.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.5.0)
## memoise 1.1.0 2017-04-21 CRAN (R 3.5.0)
## methods * 3.5.0 2018-04-24 local
## mnormt 1.5-5 2016-10-15 CRAN (R 3.5.0)
## modelr 0.1.1 2017-07-24 CRAN (R 3.5.0)
## munsell 0.4.3 2016-02-13 CRAN (R 3.5.0)
## nlme 3.1-137 2018-04-07 CRAN (R 3.5.0)
## parallel 3.5.0 2018-04-24 local
## pillar 1.2.2 2018-04-26 CRAN (R 3.5.0)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.5.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.5.0)
## psych 1.8.4 2018-05-06 CRAN (R 3.5.0)
## purrr * 0.2.4 2017-10-18 CRAN (R 3.5.0)
## R6 2.2.2 2017-06-17 CRAN (R 3.5.0)
## Rcpp 0.12.16 2018-03-13 CRAN (R 3.5.0)
## readr * 1.1.1 2017-05-16 CRAN (R 3.5.0)
## readxl 1.1.0 2018-04-20 CRAN (R 3.5.0)
## reshape2 1.4.3 2017-12-11 CRAN (R 3.5.0)
## rlang 0.2.0 2018-02-20 CRAN (R 3.5.0)
## rmarkdown 1.9 2018-03-01 CRAN (R 3.5.0)
## rprojroot 1.3-2 2018-01-03 CRAN (R 3.5.0)
## rstudioapi 0.7 2017-09-07 CRAN (R 3.5.0)
## rvest 0.3.2 2016-06-17 CRAN (R 3.5.0)
## scales 0.5.0 2017-08-24 CRAN (R 3.5.0)
## stats * 3.5.0 2018-04-24 local
## stringi 1.2.2 2018-05-02 CRAN (R 3.5.0)
## stringr * 1.3.0 2018-02-19 CRAN (R 3.5.0)
## tibble * 1.4.2 2018-01-22 CRAN (R 3.5.0)
## tidyr * 0.8.0 2018-01-29 CRAN (R 3.5.0)
## tidyverse * 1.2.1 2017-11-14 CRAN (R 3.5.0)
## tools 3.5.0 2018-04-24 local
## utils * 3.5.0 2018-04-24 local
## withr 2.1.2 2018-03-15 CRAN (R 3.5.0)
## xml2 1.2.0 2018-01-24 CRAN (R 3.5.0)
## yaml 2.1.19 2018-05-01 CRAN (R 3.5.0)