length(chr)
length(pos)
nrow(rw_run_data)
pr[,1]
pr[,2017]
pr[,2016]
head(pr)
attrib(pr)
attr(pr)
attr(pr)
for (locus in 1:10) {
rw_run_data$pos[locus] <- locus
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
rw_run_data$po
head(rw_run_data)
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
chr
?pull.genoprob
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = 17))$chr
rw_run_data <- data.frame(chr,
pos)
pos
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
pos
# Pull out chromsome and position of markers (limit to chromsome 17):
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$chr
# (will give warning message)
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
# (will give warning message)
# Set up data-frame to store output (chr, pos, lod etc) and ensure lod vector is empty
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
##########################################################################################
# Red individuals first:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
rw_run_data
# Pull out chromsome and position of markers (limit to chromsome 17):
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$chr
# (will give warning message)
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
# (will give warning message)
# Set up data-frame to store output (chr, pos, lod etc) and ensure lod vector is empty
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
##########################################################################################
# Red individuals first:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
##########################################################################################
# then white individuals:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- pos[locus]
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# Merrill Oct 2017, revised June 2018
# will change
setwd("/Users/merrill/Dropbox/Projects/male_pref_qtl/pref_qtl_anal/PlosBio_revision_anal/")
# QTL analysis based on 1_genome_scan.R to look at individuals that are homozygous or heterozygous at QTL on chromome 18 seperately.
# REQUIRED LIBRARIES ---------------------------------------------------------------------
library(qtl)
library(lme4)
library(MASS)
# FUNCTION FOR BINOMIAL GLMM SCAN ACROSS LINKAGE MAP -------------------------------------
# n.b. GLMM includes id as an individual level random factor, accounting for overdispersion (see for example, Elston et al 2001 Parasitology).
get_lod_score<-function(locus_genotypes, courtships, id, brood, GF) {
model      <- glmer(courtships ~ locus_genotypes + (1|id),  family = binomial )
null_model <- glmer(courtships ~ 1 + (1|id),  family = binomial )
lod_score<-((logLik(model) - logLik(null_model))/log(10))[1]
return(lod_score)
}
# LOAD GENOTYPE/PHENOTYPE DATA AND REMOVE REDUNDANT MARKERS ------------------------------
# load data:
masked_data<-read.cross(format = "csv", file = "../raw_data/data_for_Rqtl.csv", genotypes = c("AA","AB", "BB"), alleles = c("A","B"), estimate.map = FALSE, crosstype = "bc")
# reduce the data to remove redundent ('duplicate') markers i.e. those at same genetic (cM) position. This speeds up analysis considerably, however it should be noted that the 'peak' lod score will fall on mulitiple markers at same genetic, but not physical, position.
dups<-findDupMarkers(masked_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
no_dup_data <- masked_data
while (length(dups_to_drop) != 0) {
dups<-findDupMarkers(no_dup_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
print(length(dups_to_drop))
no_dup_data <- drop.markers(no_dup_data, dups_to_drop)
}
reduced_data <- no_dup_data
# Use jittermap because some markers are at the same cM position ...
reduced_data<-jittermap(reduced_data)
# ... and calculate probability of genotypes at each locus.
reduced_data<- calc.genoprob(reduced_data, step = 1)
# GET POSITION OF QTL ON CHR 18 ----------------------------------------------------------
mar_chr18 <- find.marker(reduced_data, 18, 0)
chr18_max_genotypes <- pull.geno(reduced_data)[,mar_chr18]
sum(is.na(chr18_max_genotypes))
if (sum(is.na(chr18_max_genotypes)) != 0) {
message("Warning! missing chr 18 genotypes ... ")
}
# add data chr18 QTL to cross:
reduced_data$pheno <- cbind(reduced_data$pheno, B_locus=chr18_max_genotypes)
# PULL OUT MAP INFORMATION --------------------------------------------
# Pull out chromsome and position of markers (limit to chromsome 17):
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$chr
# (will give warning message)
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
# (will give warning message)
# Set up data-frame to store output (chr, pos, lod etc) and ensure lod vector is empty
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
##########################################################################################
# Red individuals first:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
##########################################################################################
# then white individuals:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(white_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(white_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(white_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(white_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- pos[locus]
print(locus)
locus_genotypes <- pr[,locus]
rw_run_data$white_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
warnings()
rw_run_data
# Red individuals first:
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- locus
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# plot:
pdf("fig_S1.pdf", width=3.5, height=3.5)
plot(rw_run_data$white_court_lod~rw_run_data$pos, typ = "n", col = "blue", ylim = c(0,4.5),yaxt = 'n' ,xaxt = 'n', xaxt=NULL,yaxt=NULL, xlab="",ylab="",   lwd = 2)
box(col="dark gray")
abline(h = 3.14, col = "black", lty = 2) # sig threshold to be changed
points(rw_run_data$white_court_lod~rw_run_data$pos, typ = "l", col = "blue",  lwd = 2)
points(rw_run_data$red_court_lod~rw_run_data$pos, typ = "l", col = "orange",  lwd = 2)
rug(rw_run_data$pos, ticksize = 0.03, col = "black")
segments(x0 = 20.5 , x1 = 48.3, y0=1.25, y1=1.25, col = "black", lwd = 2 )
segments(x0 = 20.5 , x1 = 20.5, y0=1.1, y1=1.4, col = "black", lwd = 2 )
segments(x0 = 48.3 , x1 = 48.3, y0=1.1, y1=1.4, col = "black", lwd = 2 )
axis(side=1,at=c(0, 20, 40), labels = c("0", "20","40"))
axis(side=2,at=c(0,1,2,3,4), labels = c("0","1","2","3","4"),col="dark gray")
dev.off()
rw_run_data
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
# QTL 18 = HETEROZYGOTE ('RED') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- pos[locus]
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# QTL 18 = HOMOZYGOTE ('WHITE') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(white_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(white_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(white_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(white_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
locus_genotypes <- pr[,locus]
rw_run_data$white_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
plot(rw_run_data$white_court_lod~rw_run_data$pos, typ = "n", col = "blue", ylim = c(0,4.5),yaxt = 'n' ,xaxt = 'n', xaxt=NULL,yaxt=NULL, xlab="",ylab="",   lwd = 2)
points(rw_run_data$white_court_lod~rw_run_data$pos, typ = "l", col = "blue",  lwd = 2)
points(rw_run_data$red_court_lod~rw_run_data$pos, typ = "l", col = "orange",  lwd = 2)
rug(rw_run_data$pos, ticksize = 0.03, col = "black")
segments(x0 = 20.5 , x1 = 48.3, y0=1.25, y1=1.25, col = "black", lwd = 2 )
segments(x0 = 20.5 , x1 = 20.5, y0=1.1, y1=1.4, col = "black", lwd = 2 )
segments(x0 = 48.3 , x1 = 48.3, y0=1.1, y1=1.4, col = "black", lwd = 2 )
axis(side=1,at=c(0, 20, 40), labels = c("0", "20","40"))
axis(side=2,at=c(0,1,2,3,4), labels = c("0","1","2","3","4"),col="dark gray")
There were 50 or more warnings (use warnings() to see the first 50)
warnings()
abline(h = 3.14, col = "black", lty = 2)
out.red.court.np <- scanone(red_ind, pheno.col = "prop_court_mp", model="np")
rw_run_data$red_court_np_lod<- out.red.court.np$lod
out.red.court.np <- scanone(red_ind, pheno.col = "prop_court_mp", model="np", chr = 17)
rw_run_data$red_court_np_lod<- out.red.court.np$lod
rw_run_data
# Merrill Oct 2017, revised June 2018
# will change
setwd("/Users/merrill/Dropbox/Projects/male_pref_qtl/pref_qtl_anal/PlosBio_revision_anal/")
# QTL analysis based on 1_genome_scan.R to look at individuals that are homozygous or heterozygous at QTL on chromome 18 seperately.
# REQUIRED LIBRARIES ---------------------------------------------------------------------
library(qtl)
library(lme4)
library(MASS)
# FUNCTION FOR BINOMIAL GLMM SCAN ACROSS LINKAGE MAP -------------------------------------
# n.b. GLMM includes id as an individual level random factor, accounting for overdispersion (see for example, Elston et al 2001 Parasitology).
get_lod_score<-function(locus_genotypes, courtships, id, brood, GF) {
model      <- glmer(courtships ~ locus_genotypes + (1|id),  family = binomial )
null_model <- glmer(courtships ~ 1 + (1|id),  family = binomial )
lod_score<-((logLik(model) - logLik(null_model))/log(10))[1]
return(lod_score)
}
# LOAD GENOTYPE/PHENOTYPE DATA AND REMOVE REDUNDANT MARKERS ------------------------------
# load data:
masked_data<-read.cross(format = "csv", file = "../raw_data/data_for_Rqtl.csv", genotypes = c("AA","AB", "BB"), alleles = c("A","B"), estimate.map = FALSE, crosstype = "bc")
# reduce the data to remove redundent ('duplicate') markers i.e. those at same genetic (cM) position. This speeds up analysis considerably, however it should be noted that the 'peak' lod score will fall on mulitiple markers at same genetic, but not physical, position.
dups<-findDupMarkers(masked_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
no_dup_data <- masked_data
while (length(dups_to_drop) != 0) {
dups<-findDupMarkers(no_dup_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
print(length(dups_to_drop))
no_dup_data <- drop.markers(no_dup_data, dups_to_drop)
}
reduced_data <- no_dup_data
# Use jittermap because some markers are at the same cM position ...
reduced_data<-jittermap(reduced_data)
# ... and calculate probability of genotypes at each locus.
reduced_data<- calc.genoprob(reduced_data, step = 1)
# GET POSITION OF QTL ON CHR 18 ----------------------------------------------------------
mar_chr18 <- find.marker(reduced_data, 18, 0)
chr18_max_genotypes <- pull.geno(reduced_data)[,mar_chr18]
sum(is.na(chr18_max_genotypes))
if (sum(is.na(chr18_max_genotypes)) != 0) {
message("Warning! missing chr 18 genotypes ... ")
}
# add data chr18 QTL to cross:
reduced_data$pheno <- cbind(reduced_data$pheno, B_locus=chr18_max_genotypes)
# PULL OUT MAP INFORMATION --------------------------------------------
# Pull out chromsome and position of markers (limit to chromsome 17):
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$chr
# (will give warning message)
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
# (will give warning message)
# Set up data-frame to store output (chr, pos, lod etc) and ensure lod vector is empty
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
# QTL 18 = HETEROZYGOTE ('RED') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- pos[locus]
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# non-parametric methods with R/qtl
out.red.court.np <- scanone(red_ind, pheno.col = "prop_court_mp", model="np", chr = 17)
rw_run_data$red_court_np_lod<- out.red.court.np$lod
# QTL 18 = HOMOZYGOTE ('WHITE') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(white_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(white_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(white_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(white_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
locus_genotypes <- pr[,locus]
rw_run_data$white_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# non-parametric methods with R/qtl
out.white.court.np <- scanone(white_ind, pheno.col = "prop_court_mp", model="np",chr = 17)
rw_run_data$white_court_np_lod<- out.white.court.np$lod
rw_run_data
# write results to csv
write.csv(rw_run_data,"../derived_data/rw_chr17_lod_scores.csv",  row.names = FALSE)
# Merrill Oct 2017, revised June 2018
# will change
setwd("/Users/merrill/Dropbox/Projects/male_pref_qtl/pref_qtl_anal/PlosBio_revision_anal/")
# QTL analysis based on 1_genome_scan.R to look at individuals that are homozygous or heterozygous at QTL on chromome 18 seperately.
# REQUIRED LIBRARIES ---------------------------------------------------------------------
library(qtl)
library(lme4)
library(MASS)
# FUNCTION FOR BINOMIAL GLMM SCAN ACROSS LINKAGE MAP -------------------------------------
# n.b. GLMM includes id as an individual level random factor, accounting for overdispersion (see for example, Elston et al 2001 Parasitology).
get_lod_score<-function(locus_genotypes, courtships, id, brood, GF) {
model      <- glmer(courtships ~ locus_genotypes + (1|id),  family = binomial )
null_model <- glmer(courtships ~ 1 + (1|id),  family = binomial )
lod_score<-((logLik(model) - logLik(null_model))/log(10))[1]
return(lod_score)
}
# LOAD GENOTYPE/PHENOTYPE DATA AND REMOVE REDUNDANT MARKERS ------------------------------
# load data:
masked_data<-read.cross(format = "csv", file = "../raw_data/data_for_Rqtl.csv", genotypes = c("AA","AB", "BB"), alleles = c("A","B"), estimate.map = FALSE, crosstype = "bc")
# reduce the data to remove redundent ('duplicate') markers i.e. those at same genetic (cM) position. This speeds up analysis considerably, however it should be noted that the 'peak' lod score will fall on mulitiple markers at same genetic, but not physical, position.
dups<-findDupMarkers(masked_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
no_dup_data <- masked_data
while (length(dups_to_drop) != 0) {
dups<-findDupMarkers(no_dup_data, exact.only = FALSE, adjacent.only = TRUE)
dups_to_drop<-unique(unlist(unique(dups)))
print(length(dups_to_drop))
no_dup_data <- drop.markers(no_dup_data, dups_to_drop)
}
reduced_data <- no_dup_data
# Use jittermap because some markers are at the same cM position ...
reduced_data<-jittermap(reduced_data)
# ... and calculate probability of genotypes at each locus.
reduced_data<- calc.genoprob(reduced_data, step = 1)
# GET POSITION OF QTL ON CHR 18 ----------------------------------------------------------
mar_chr18 <- find.marker(reduced_data, 18, 0)
chr18_max_genotypes <- pull.geno(reduced_data)[,mar_chr18]
sum(is.na(chr18_max_genotypes))
if (sum(is.na(chr18_max_genotypes)) != 0) {
message("Warning! missing chr 18 genotypes ... ")
}
# add data chr18 QTL to cross:
reduced_data$pheno <- cbind(reduced_data$pheno, B_locus=chr18_max_genotypes)
# PULL OUT MAP INFORMATION --------------------------------------------
# Pull out chromsome and position of markers (limit to chromsome 17):
chr <- (pull.genoprob(reduced_data,  omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$chr
# (will give warning message)
pos <- (pull.genoprob(reduced_data, omit.first.prob=TRUE, include.pos.info=TRUE, chr = "17"))$pos
# (will give warning message)
# Set up data-frame to store output (chr, pos, lod etc) and ensure lod vector is empty
rw_run_data <- data.frame(chr,
pos)
rw_run_data$red_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$red_court_np_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_lod<-NA  			# lod score for courtship glmm
rw_run_data$white_court_np_lod<-NA
# SPLIR BY 'RED' and 'WHITE'  --------------------------------------------
red_ind <- reduced_data[,reduced_data$pheno$B_locus == 2]
white_ind <- reduced_data[,reduced_data$pheno$B_locus == 1]
# QTL 18 = HETEROZYGOTE ('RED') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(red_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(red_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(red_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(red_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
rw_run_data$pos[locus] <- pos[locus]
locus_genotypes <- pr[,locus]
rw_run_data$red_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# non-parametric methods with R/qtl
out.red.court.np <- scanone(red_ind, pheno.col = "prop_court_mp", model="np", chr = 17)
rw_run_data$red_court_np_lod<- out.red.court.np$lod
# QTL 18 = HOMOZYGOTE ('WHITE') --------------------------------------------
# PULL OUT GENOTYPE AND PHENOTYPE INFORMATION --------------------------------------------
# pull out id
id <- pull.pheno(white_ind, pheno.col = "id")
# Pull out phenotype data:
court_cp <- pull.pheno(white_ind, pheno.col = "court_cp")
court_mp <- pull.pheno(white_ind, pheno.col = "court_mp")
courtships <- cbind(court_mp, court_cp)
# Calculate genotype probabilies across genome.
pr <- pull.genoprob(white_ind, omit.first.prob=TRUE, chr = 17)
# GLMM: run genome scan and store lod scores for each position across linkage map
for (locus in 1:length(pos)) {
locus_genotypes <- pr[,locus]
rw_run_data$white_court_lod[locus] <- get_lod_score(locus_genotypes, courtships, id )
}
# non-parametric methods with R/qtl
out.white.court.np <- scanone(white_ind, pheno.col = "prop_court_mp", model="np",chr = 17)
rw_run_data$white_court_np_lod<- out.white.court.np$lod
# write results to csv
write.csv(rw_run_data,"../derived_data/rw_chr17_lod_scores.csv",  row.names = FALSE)
