Load locally saved data files.
load("/Users/verta/Documents/ddPCR/ddPCR_results/ddPCRdataFiles.RData")
##
Assign shorted tissue labels.
design$TissueOrRep[which(design$TissueOrRep == "heart")] = "Hr"
design$TissueOrRep[which(design$TissueOrRep == "liver")] = "Liv"
design$TissueOrRep[which(design$TissueOrRep == "muscle")] = "Mc"
design$TissueOrRep[which(design$TissueOrRep == "adipose")] = "Ad"
design$TissueOrRep[which(design$TissueOrRep == "gonad")] = "Gn"
design$TissueOrRep[which(design$TissueOrRep == "brain")] = "Br"
design$TissueOrRep[which(design$TissueOrRep == "sperm")] = "Sp"
design$TissueOrRep[which(design$TissueOrRep == "stripped_gonad")] = "SG"
head(design)
## VGLL3 FamID TankOrPosition Individual TissueOrRep Temp CollectionDate PIT
## 13 LL 13 W1-3-3C 11 1 W 19022018 NA
## 14 LL 13 W1-3-3C 43 2 W 19022018 NA
## 15 LL 13 W1-3-3C 75 3 W 19022018 NA
## 19 LE 15 W1-4-2A 17 1 W 19022018 NA
## 20 LE 15 W1-4-2A 49 2 W 19022018 NA
## 21 LE 15 W1-4-2A 81 3 W 19022018 NA
## SexGeno SexPheno Gonads
## 13 NA NA NA
## 14 NA NA NA
## 15 NA NA NA
## 19 NA NA NA
## 20 NA NA NA
## 21 NA NA NA
Compile the results tables from QuantStudio. Join results table from different runs (reverse the order to get the first timepoint first). The “micro” in the column names is non-ASCII character that we ignore using check.names=F and then convert to “u” using iconv. Also the column names are shifted one towards the right, so this is corrected for.
ddPCRres = rbind(ddPCRres1, ddPCRres2, ddPCRres3, ddPCRres4, ddPCRres5, ddPCRres6, ddPCRres7, ddPCRres8, ddPCRres9, ddPCRres10, ddPCRres12, ddPCRres13, ddPCRres14)
colnames(ddPCRres) = iconv(colnames(ddPCRres), to = "ASCII", sub = "u")
colnames(ddPCRres) = c(colnames(ddPCRres)[-2],"Empty")
Compile table containing the input concentrations of RNA for these samples. Paste vgll3 genotype, position/tank, tissue/rep and collection date to make up sample ID’s as above.
conc$Sample = 'Na'
# match by sample names given in ddPCR plate (this may vary from run to run)
# whole fry samples from 19022018
conc$Sample[which(conc$CollectionDate == '19022018')] = paste(conc$TankOrPosition[which(conc$CollectionDate == '19022018')], conc$TissueOrRep[which(conc$CollectionDate == '19022018')], sep='-')
# samples with vgll3-tank-tissue-date IDs
conc$Sample[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))] =
paste(conc$VGLL3[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))],
conc$TankOrPosition[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))],
conc$TissueOrRep[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))],
conc$CollectionDate[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))], sep='-')
# heterozygote samples from 11072018 with individual # IDs
conc$Sample[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))] =
paste(conc$Individual[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))],
conc$TissueOrRep[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))], sep='-')
# homozygote/heterozygote samples with individual # IDs
conc$Sample[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
"20092018","9102018","10102018","11102018","12102018",
"5112018","6112018","7112018","8112018","9112018","12112018",
"23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))] =
paste(conc$Individual[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
"20092018","9102018","10102018","11102018","12102018",
"5112018","6112018","7112018","8112018","9112018","12112018",
"23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))],
conc$TissueOrRep[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
"20092018","9102018","10102018","11102018","12102018",
"5112018","6112018","7112018","8112018","9112018","12112018",
"23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))],sep='-')
head(conc)
## .id VGLL3 TankOrPosition Individual TissueOrRep CollectionDate RNAconc
## 1: 1 EL 10 268 Ad 11072018 10.8
## 2: 1 LE 10 269 Ad 11072018 9.6
## 3: 1 LE 10 269 Gn 11072018 11.0
## 4: 1 EL 12 272 Ad 11072018 10.0
## 5: 1 EL 12 272 Gn 11072018 7.6
## 6: 1 LE 12 273 Ad 11072018 10.3
## Sample
## 1: 268-Ad
## 2: 269-Ad
## 3: 269-Gn
## 4: 272-Ad
## 5: 272-Gn
## 6: 273-Ad
tail(conc)
## .id VGLL3 TankOrPosition Individual TissueOrRep CollectionDate RNAconc
## 1: 13 EE 15 278 Ad 11072018 9.8
## 2: 13 LL 15 279 Hr 11072018 9.1
## 3: 13 LL 15 279 Liv 11072018 10.7
## 4: 13 LL 15 279 Mc 11072018 10.8
## 5: 13 LL 15 279 Ad 11072018 10.1
## 6: 13 LL 15 279 Gn 11072018 9.8
## Sample
## 1: EE-15-Ad-11072018
## 2: LL-15-Hr-11072018
## 3: LL-15-Liv-11072018
## 4: LL-15-Mc-11072018
## 5: LL-15-Ad-11072018
## 6: LL-15-Gn-11072018
Second concentration reading for gonad samples that have been analyzed.
conc2 = na.omit(conc2)
Merge the two concentration measurements and compare by plotting 1st concentration readings versus 2nd.
aveConc = merge(conc,conc2,by.x="Sample",by.y="Sample")
summary(lm(aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x))
##
## Call:
## lm(formula = aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -82.748 0.683 4.405 13.368 44.676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## aveConc$RNAconc.x 0.92554 0.03045 30.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.16 on 107 degrees of freedom
## Multiple R-squared: 0.8962, Adjusted R-squared: 0.8952
## F-statistic: 923.8 on 1 and 107 DF, p-value: < 2.2e-16
plot(aveConc$RNAconc.x,aveConc$RNAconc.y, ylab="RNA concentration measurement 1", xlab="RNA concentration measurement 2")
abline(lm(aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x))
Below we identify problematic samples where the 2nd concentration is much lower than the 1st (degradation of RNA more likely than false concentration on the 1st reading).
aveConc[which(aveConc$RNAconc.x/aveConc$RNAconc.y > 2)]
## Sample .id VGLL3 TankOrPosition Individual.x TissueOrRep CollectionDate
## 1: 326-Gn 9 LE 2(2) 326 Gn 9082018
## 2: 338-Gn 9 LE 15 338 Gn 13082018
## 3: 341-Gn 9 EL 15(2) 341 Gn 13082018
## 4: 358-Gn 9 LE 12(2) 358 Gn 14082018
## 5: 370-Gn 9 LE 10 370 Gn 15082018
## 6: 373-Gn 9 EL 10(2) 373 Gn 15082018
## 7: 389-Gn 9 EL 13(2) 389 Gn 16082018
## 8: 390-Gn 9 LE 13(2) 390 Gn 16082018
## 9: 621-Gn 8 LE 2 621 Gn 9102018
## RNAconc.x Individual.y RNAconc.y
## 1: 97.4 326 7.4
## 2: 60.9 338 22.3
## 3: 89.4 341 12.9
## 4: 53.7 358 13.7
## 5: 69.4 370 10.2
## 6: 72.1 373 14.6
## 7: 90.6 389 16.8
## 8: 61.0 390 16.9
## 9: 117.0 621 47.0
# turn the second concentration reading for these samples to NA's
omitConcSamples = aveConc[which(aveConc$RNAconc.x/aveConc$RNAconc.y > 2),"Sample"]
conc2$RNAconc[which(conc2$Sample %in% omitConcSamples$Sample)] = NA
# verify
plot(conc$RNAconc[match(conc2$Sample,conc$Sample)],conc2$RNAconc, ylab="RNA concentration measurement 1", xlab="RNA concentration measurement 2")
Merge the two measurements again and take the average of the two measurements and use this as the value for all analyses.
aveConc = merge(conc,conc2,by.x="Sample",by.y="Sample")
aveConc$aveRNAconc = NA
aveConc$aveRNAconc = apply(aveConc[,c("RNAconc.x","RNAconc.y")],1,mean,na.rm=T)
aveConc
## Sample .id VGLL3 TankOrPosition Individual.x TissueOrRep
## 1: 1112-Gn 10 EL 12 1112 Gn
## 2: 1113-Gn 10 EL 12(2) 1113 Gn
## 3: 272-Gn 1 EL 12 272 Gn
## 4: 283-Gn 1 LL 5 283 Gn
## 5: 284-Gn 9 EL 5 284 Gn
## ---
## 104: EE-10-Gn-11072018 13 EE 10 266 Gn
## 105: EE-12-Gn-11072018 13 EE 12 270 Gn
## 106: LL-10-Gn-11072018 13 LL 10 267 Gn
## 107: LL-12-Gn-11072018 13 LL 12 271 Gn
## 108: LL-15-Gn-11072018 13 LL 15 279 Gn
## CollectionDate RNAconc.x Individual.y RNAconc.y aveRNAconc
## 1: 23112018 13.1 1112 25.3 19.20
## 2: 23112018 14.5 1113 18.0 16.25
## 3: 11072018 7.6 272 12.3 9.95
## 4: 6082018 11.0 283 14.4 12.70
## 5: 6082018 17.9 284 17.1 17.50
## ---
## 104: 11072018 10.2 266 13.6 11.90
## 105: 11072018 10.3 270 12.9 11.60
## 106: 11072018 11.0 267 15.5 13.25
## 107: 11072018 9.1 271 9.5 9.30
## 108: 11072018 9.8 279 13.9 11.85
Intersect design table, results table and concentrations to make a master table to be used in downstream analyses.
# Initiate empty table
countTable = setNames(data.frame(matrix(ncol = 18, nrow = 0)), c("VGLL3","FamID","TankOrPosition","Individual","TissueOrRep","Temp","CollectionDate","Sex","Sample","Ch1Conc","Ch2Conc","Ch1Copiesper20ulWell","Ch2Copiesper20ulWell","FractionalAbundance","Ch1Pos","Ch2Pos","Ch1Neg","Ch2Neg"))
# Split ddPCR results into two based on channels
ddPCRch1 = ddPCRres[which(ddPCRres$TargetType == "Ch1Unknown"),]
ddPCRch2 = ddPCRres[which(ddPCRres$TargetType == "Ch2Unknown"),]
#################################################################################
# Add ddPCR channel concentrations by mathing sample name to information on design table
# whole fry samples from 19022018
sel = function(x) unlist(strsplit(x,"-"))[1] == "W1"
tempConc1 = ddPCRch1[unlist(lapply(ddPCRch1$Sample,sel)),]
tempConc2 = ddPCRch2[unlist(lapply(ddPCRch2$Sample,sel)),]
tempDesign = design[unlist(lapply(design$TankOrPosition,sel)),]
tempDesign$Sample = paste(tempDesign$TankOrPosition,tempDesign$TissueOrRep,sep="-")
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(sel,tempConc1,tempConc2,tempDesign)
# samples with vgll3-tank-tissue-date IDs
sel = function(x) unlist(strsplit(x,"-"))[4] %in% c("20032018","20042018","17052018","11062018","11072018") & unlist(strsplit(x,"-"))[1] %in% c("EL","LE") == F
tempConc1 = ddPCRch1[unlist(lapply(ddPCRch1$Sample,sel)),]
tempConc2 = ddPCRch2[unlist(lapply(ddPCRch2$Sample,sel)),]
tempDesign = design[which(design$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018") & design$VGLL3 %in% c("EL","LE") == F),]
tempDesign$Sample = paste(tempDesign$VGLL3,tempDesign$TankOrPosition,tempDesign$TissueOrRep,tempDesign$CollectionDate, sep="-")
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(sel,tempConc1,tempConc2,tempDesign)
# heterozygote samples from 11072018 with individual # IDs
tempDesign = design[which(design$CollectionDate == '11072018' & design$VGLL3 %in% c("EL","LE")),]
tempDesign$Sample = paste(tempDesign$Individual,tempDesign$TissueOrRep, sep="-")
tempConc1 = ddPCRch1[which(ddPCRch1$Sample %in% tempDesign$Sample),]
tempConc2 = ddPCRch2[which(ddPCRch2$Sample %in% tempDesign$Sample),]
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(tempConc1,tempConc2,tempDesign)
# samples with individual # IDs
tempDesign = design[which(design$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
"20092018","9102018","10102018","11102018","12102018",
"5112018","6112018","7112018","8112018","9112018","12112018",
"23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019")),]
tempDesign$Sample = paste(tempDesign$Individual,tempDesign$TissueOrRep, sep="-")
tempConc1 = ddPCRch1[which(ddPCRch1$Sample %in% tempDesign$Sample),]
tempConc2 = ddPCRch2[which(ddPCRch2$Sample %in% tempDesign$Sample),]
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(tempConc1,tempConc2,tempDesign)
######################################
# add starting total RNA concentration
countTable$RNAconc = conc$RNAconc[match(countTable$Sample,conc$Sample)]
#head(countTable)
#tail(countTable)
# replace with average concentrations for samples analyzed in duplicates
countTable$RNAconc[match(aveConc$Sample,countTable$Sample)] = aveConc$aveRNAconc
######################################
# remove samples that failed the ddPCR
failedSamples = c()
failed = c(which(apply(countTable[,c("Ch1Pos","Ch2Pos","Ch1Neg","Ch2Neg")],1,sum) < 10000),
which(countTable$Sample %in% failedSamples))
countTable = countTable[-failed,]
We wish to normalize the number of vgll3 copies to the amount of input RNA in each sample. Input RNA concentration is ng/ul of template (ng/ul_H2O) and ddPCR concentration is copies/ul of PCR reaction (cp/ul_PCR).
We first calculate the concentration of template RNA in the PCR reaction: we know the original template concentration, X ng/ul_H2O. We mixed 2 ul of template with 20 ul of PCR reaction, meaning we put 2•X ng into 22 ul of PCR reaction and our concentration is 2•X/22 = N ng/ul_PCR.
Quantasoft reports target concentration as Y copies/ul_PCR. So now we have Y copies/ul_PCR and we know there was N ng/ul_PCR of template, meaning we can calculate our normalized value of expression as Y copies/N ng of template by dividing Y with N:
\(\frac{Y copies}{1 ul}\) : \(\frac{N ng}{1 ul}\) = \(\frac{Y copies}{1 ul}\) • \(\frac{1 ul}{N ng}\) = \(\frac{Y copies}{N ng}\)
Therefore, we need to calculate the concentration of the total RNA per PCR reaction (N = [RNAconc*2]/22) and divide ddPCR concentration with this value to get copies/ng of input RNA.
# correct RNA concentration for one sample with pipetting error (40ul of RT-ddPCR MM instead of 20ul)
#countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"] = countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"]/2
# concentration measurement for individual 918 gonad is false
#countTable[which(grepl("918-Gn",countTable$Sample)),"RNAconc"] = NA
# calculation
# countTable$Ch1copies = as.numeric(countTable$Ch1Conc)/((countTable$RNAconc*2)/22)
# countTable$Ch2copies = as.numeric(countTable$Ch2Conc)/((countTable$RNAconc*2)/22)
# countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies
# head(countTable)
# alternative way of calculating
# # calculation of target concentration in template (2 ul)
# countTable$Ch1TemplateConc = as.numeric(countTable$Ch1Conc)*20/2
# countTable$Ch2TemplateConc = as.numeric(countTable$Ch2Conc)*20/2
# countTable$TotTemplateConc = countTable$Ch1TemplateConc+countTable$Ch2TemplateConc
#
# # calculation of template copies per ng of input RNA
# countTable$Ch1copies = countTable$Ch1TemplateConc / countTable$RNAconc
# countTable$Ch2copies = countTable$Ch2TemplateConc / countTable$RNAconc
# countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies
# head(countTable)
Alternative way of normalizing to input RNA amount. QuantaSoft also provides calculated number of template molecules per 20 ul well. Each 20 ul well contains 2 ul of template at concentration X ng/ul_H2O, meaning 2•X ng/ul_H2O total RNA per well. Copies per ng total input RNA is then simply \(\frac{copies per well}{2*RNAconc}\)
# correct RNA concentration for one sample with pipetting error (40ul of RT-ddPCR MM instead of 20ul)
countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"] = countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"]/2
# calculation
countTable$Ch1copies = as.numeric(countTable$Ch1Copiesper20ulWell)/(countTable$RNAconc*2)
countTable$Ch2copies = as.numeric(countTable$Ch2Copiesper20ulWell)/(countTable$RNAconc*2)
countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies
head(countTable)
## VGLL3 FamID TankOrPosition Individual TissueOrRep Temp CollectionDate PIT
## 13 LL 13 W1-3-3C 11 1 W 19022018 NA
## 14 LL 13 W1-3-3C 43 2 W 19022018 NA
## 15 LL 13 W1-3-3C 75 3 W 19022018 NA
## 19 LE 15 W1-4-2A 17 1 W 19022018 NA
## 20 LE 15 W1-4-2A 49 2 W 19022018 NA
## 21 LE 15 W1-4-2A 81 3 W 19022018 NA
## SexGeno SexPheno Gonads Sample Ch1Copiesper20ulWell Ch2Copiesper20ulWell
## 13 NA NA NA W1-3-3C-1 224 0.0
## 14 NA NA NA W1-3-3C-2 198 0.0
## 15 NA NA NA W1-3-3C-3 136 1.6
## 19 NA NA NA W1-4-2A-1 62 76.0
## 20 NA NA NA W1-4-2A-2 164 74.0
## 21 NA NA NA W1-4-2A-3 82 60.0
## Ch1Conc Ch2Conc FractionalAbundance Ratio PoissonRatioMax PoissonRatioMin
## 13 11.2 0 NA NA NA NA
## 14 9.9 0 NA NA NA NA
## 15 6.8 0.08 98.8 80.00 270.00 0.00
## 19 3.1 3.8 45.0 0.83 1.28 0.38
## 20 8.2 3.7 69.0 2.20 3.10 1.30
## 21 4.1 3 58.0 1.40 2.00 0.70
## Ch1Pos Ch2Pos Ch1Neg Ch2Neg RNAconc Ch1copies Ch2copies TotCopies
## 13 110 0 11498 11608 10.1 11.089109 0.00000000 11.089109
## 14 113 0 13357 13470 11.0 9.000000 0.00000000 9.000000
## 15 80 1 13899 13978 11.0 6.181818 0.07272727 6.254545
## 19 24 29 8999 8994 12.4 2.500000 3.06451613 5.564516
## 20 68 31 9734 9771 11.0 7.454545 3.36363636 10.818182
## 21 42 31 12141 12152 10.7 3.831776 2.80373832 6.635514
Group data by sampling time-point (months post-hatching).
#+++++++++++++++++++++++++
# Function to calculate the mean, the standard deviation and the standard error of the mean for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
#to be summariezed
# groupnames : vector of column names to be used as
# grouping variables
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE),
sem = sd(x[[col]], na.rm=TRUE)/sqrt(length(x[[col]])),
n = length(x[[col]]))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,varname)
data_sum <- plyr::rename(data_sum, c("mean" = varname))
return(data_sum)
}
# duplicate main results table
plotData = countTable
# group all whole fry samples together (timepoint 2)
plotData$TissueOrRep = revalue(plotData$TissueOrRep, c("1"="Fry","2"="Fry","3"="Fry"))
# re-order tissues for plotting
plotData$TissueOrRep = factor(plotData$TissueOrRep,levels=c("Fry","Hr","Liv","Mc","Ad","Gn","Br","SG","Sp"))
# re-order vgll3 genotypes so that EE is plotted first
#plotData$VGLL3 = factor(plotData$VGLL3,levels=c("EE","EL","LE","LL"))
# assign gonads as factors
plotData$Gonads = factor(plotData$Gonads,levels=c("m","s","l","xl"))
# remove infinite and missing values
plotData = plotData[which(is.infinite(plotData$TotCopies) == F),]
plotData = plotData[which(is.na(plotData$TotCopies) == F),]
# add information of timepoints
plotData$Timepoint = "Na"
plotData$Timepoint[which(plotData$CollectionDate == "19022018")] = "2"
plotData$Timepoint[which(plotData$CollectionDate == "20032018")] = "3"
plotData$Timepoint[which(plotData$CollectionDate == "20042018")] = "4"
plotData$Timepoint[which(plotData$CollectionDate == "17052018")] = "5"
plotData$Timepoint[which(plotData$CollectionDate == "11062018")] = "6"
plotData$Timepoint[which(plotData$CollectionDate == "11072018")] = "7"
plotData$Timepoint[which(plotData$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018"))] = "8"
plotData$Timepoint[which(plotData$CollectionDate %in% c("27082018","28082018","29082018","20092018"))] = "9"
plotData$Timepoint[which(plotData$CollectionDate %in% c("8102018","9102018","10102018","11102018","12102018"))] = "10"
plotData$Timepoint[which(plotData$CollectionDate %in% c("5112018","6112018","7112018","8112018","9112018","12112018"))] = "11"
plotData$Timepoint[which(plotData$CollectionDate %in% c("23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018"))] = "12"
plotData$Timepoint[which(plotData$CollectionDate %in% c("10012019"))] = "13"
# copy data to a new data frame to be modified
plotData2 = plotData
Sample exclusions Below we verify that genotypes and RT-ddPCR signal channel match (by indentifying individuals that e.g. are recorded as LL but only contain signal from the EE probes), and that recorder morphological sex matches genotypic sex. We also exclude data from heterozygotes showing monoallelic vgll3 expression, likely representing sampling or labeling error.
# LL genotypes should express LL probe higher compared to EE probe
LLsamples = plotData2$Sample[which(plotData2$VGLL3 == "LL")]
LLinconsistent = LLsamples[which(plotData2$Ch1copies[which(plotData2$VGLL3 == "LL")] - plotData2$Ch2copies[which(plotData2$VGLL3 == "LL")] < 0)]
# EE genotypes should express EE probe higher compared to LL probe
EEsamples = plotData2$Sample[which(plotData2$VGLL3 == "EE")]
EEinconsistent = EEsamples[which(plotData2$Ch1copies[which(plotData2$VGLL3 == "EE")] - plotData2$Ch2copies[which(plotData2$VGLL3 == "EE")] > 0)]
# remove these individuals from the data
plotData2 = plotData2[which(plotData2$Sample %in% c(EEinconsistent,LLinconsistent) == F),]
# remove individuals where genotypic sex and morphology do not match
inconsistentSex = plotData2[which(plotData2$SexGeno == "male" & plotData2$Gonads == "m" | (plotData2$SexGeno == "female" & plotData2$Gonads %in% c("s","l","xl"))),]
plotData2 = plotData2[which(plotData2$Individual %in% inconsistentSex$Individual == F),]
# remove heterozygotes with monoallelic vgll3 expression
monoallelic = plotData2[which(plotData2$VGLL3 %in% c("EL","LE") & plotData2$FractionalAbundance > 90),]
plotData2 = plotData2[which(plotData2$Individual %in% monoallelic$Individual == F),]
Impute missing annotations on early samples (morphological differences between gonads were unrecorded for first dissectable gonads because of their very small size) and assign gonad types based on gonad morphology. Create a new column “TissueOrGonadType” where gonads are assigned to ovaries, immature testes, pubertal testes and mature testes based on their visual morphology (recorded in “Gonads” column).
# assign missing gonad types by genotypic sex in early samples (all males before timepoint 8 are type "s" and females type "m")
missingGonadInfo = plotData2[which(is.na(plotData2$Gonads) & is.na(plotData2$SexGeno)==F), "Individual"]
for (i in missingGonadInfo){
if (unique(plotData2$SexGeno[which(plotData2$Individual == i)]) == "female"){
plotData2[which(plotData2$Individual == i),"Gonads"] = "m"
}
if (unique(plotData2$SexGeno[which(plotData2$Individual == i)]) == "male"){
plotData2[which(plotData2$Individual == i),"Gonads"] = "s"
}
}
# verify that assignment is correct
table(plotData2[which(plotData2$Individual%in% missingGonadInfo),c("SexGeno","Gonads")])
## Gonads
## SexGeno m s l xl
## female 76 0 0 0
## male 0 59 0 0
## NA 0 0 0 0
# assign ovaries, immature testes and mature testes by morphology
# here l gonads considered "pubertal" because they have clearly initiated pubertal cell type changes
plotData2$TissueOrGonadType = NA
plotData2$TissueOrGonadType = as.character(plotData2$TissueOrRep)
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads == "m"),"TissueOrGonadType"] = "Ov"
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("s")),"TissueOrGonadType"] = "IT"
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("l")),"TissueOrGonadType"] = "PT"
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("xl")),"TissueOrGonadType"] = "MT"
Average expression across tissues and time points (with number of samples in each time-point indicated by number in grey).
# calculate sd and sem for plotting
plotStats = data_summary(plotData2, varname="TotCopies", groupnames=c("Timepoint","TissueOrGonadType"))
# sort by timepoint
plotStats$Timepoint = factor(plotStats$Timepoint,levels=c("2","3","4","5","6","7","8","9","10","11","12","13","14"))
# order tissues
plotStats$TissueOrGonadType = factor(plotStats$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))
# remove rows where we don't know the gonad morphology
plotStats = plotStats[is.na(plotStats$TissueOrGonadType)==F,]
# labels
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Ovary","IT"="Immature testis","PT"="Pubertal testes","MT"="Mature testis","SG"="Stripped testis","Sp"="Ejaculate")
ggplot(plotStats,aes(Timepoint, TotCopies)) +
geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
geom_line() +
geom_point() +
theme_classic() +
theme(axis.title.y=element_text(size=16),
axis.text.y=element_text(size=16),
axis.title.x=element_text(size=16),
axis.text.x=element_text(size=16),
strip.text.x=element_text(size=16),
strip.background = element_blank()) +
geom_text(aes(y=TotCopies-15,x=as.numeric(Timepoint)+0.45,label=n),vjust=0.5,hjust=0.4,color="darkgrey", size=3) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Months post hatch") +
#ylim(-50,250) +
scale_x_discrete(breaks=seq(2,14,2))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
Contrast immature testes expression with expression in mature testes, stripped testes and ejaculate.
# summary stats for timepoints where we have mature gonads (for panel b)
plotStats2 = data_summary(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")]),], varname="TotCopies", groupnames=c("TissueOrGonadType"))
# order tissues
plotStats2$TissueOrGonadType = factor(plotStats2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))
ggplot(plotStats2[which(plotStats2$TissueOrGonadType %in% c("IT","MT", "Sp","SG")),],aes(TissueOrGonadType, TotCopies, color=TissueOrGonadType)) +
geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
geom_point(size=4) +
theme_classic() +
scale_color_manual(values=c("black","black", "black","black")) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
theme(axis.title.x=element_blank(),
axis.text.x=element_text(hjust=1, vjust=1, size=14,color="black",angle=65),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
legend.position="none")+
geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))
# t-tests
# mature testes versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="MT"),"TotCopies"],
plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])
##
## Welch Two Sample t-test
##
## data: plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "MT")] & plotData2$TissueOrGonadType == "MT"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "MT")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -6.3351, df = 25.904, p-value = 1.061e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -92.63591 -47.24216
## sample estimates:
## mean of x mean of y
## 36.70081 106.63984
# stripped testes versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="SG")] & plotData2$TissueOrGonadType=="SG"),"TotCopies"],
plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="SG")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])
##
## Welch Two Sample t-test
##
## data: plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "SG")] & plotData2$TissueOrGonadType == "SG"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "SG")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -3.5935, df = 21.795, p-value = 0.001635
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -110.89187 -29.70551
## sample estimates:
## mean of x mean of y
## 57.99373 128.29242
# ejaculate versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="Sp")] & plotData2$TissueOrGonadType=="Sp"),"TotCopies"],
plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="Sp")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])
##
## Welch Two Sample t-test
##
## data: plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "Sp")] & plotData2$TissueOrGonadType == "Sp"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "Sp")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -4.2303, df = 6.5563, p-value = 0.004505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -180.68655 -49.96748
## sample estimates:
## mean of x mean of y
## 18.54077 133.86778
# percent reduction in expression:
ITmean = mean(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"],na.rm=T)
MTmean = mean(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="MT"),"TotCopies"],na.rm=T)
(ITmean-MTmean)/ITmean*100
## [1] 65.58434
# contrast p-value
my_comparisons5 = data.frame(start=c("IT"),
end=c("MT"),
y=c(120,120),
label=c("P=1e-06"))
my_comparisons6 = data.frame(start=c("IT"),
end=c("SG"),
y=c(130,130),
label=c("P=0.002"))
my_comparisons7 = data.frame(start=c("IT"),
end=c("Sp"),
y=c(140,140),
label=c("P=0.005"))
testesFig2 = ggplot(plotStats2[which(plotStats2$TissueOrGonadType %in% c("IT","MT", "Sp","SG")),]) +
geom_errorbar(aes(x=TissueOrGonadType, ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
geom_point(aes(TissueOrGonadType, TotCopies, color=TissueOrGonadType),size=2) +
theme_classic() +
geom_signif(data=my_comparisons5,
aes(xmin=start, xmax=end, annotations=label, y_position=y),
textsize = 3, size=0.5, tip_length = 0.01,
manual=TRUE) +
geom_signif(data=my_comparisons6,
aes(xmin=start, xmax=end, annotations=label, y_position=y),
textsize = 3, size=0.5, tip_length = 0.01,
manual=TRUE) +
geom_signif(data=my_comparisons7,
aes(xmin=start, xmax=end, annotations=label, y_position=y),
textsize = 3, size=0.5, tip_length = 0.01,
manual=TRUE) +
scale_color_manual(values=c("black","black", "black","black")) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
theme(axis.title.x=element_blank(),
axis.text.x=element_text(hjust=1, vjust=1, size=10,color="black",angle=65),
axis.text.y=element_text(size=10,color="black"),
axis.title.y=element_text(size=10,color="black"),
legend.position="none")+
#geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
testesFig2
Testes expression colored by genotype.
# collapse heterozygotes
plotData2$VGLL3RECPRO = revalue(plotData2$VGLL3,c("EL"="EL","LE"="EL"))
# summary stats for timepoints where we have mature gonads (for panel b)
plotStats2_2 = data_summary(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")]),], varname="TotCopies", groupnames=c("TissueOrGonadType","VGLL3RECPRO"))
# order tissues
plotStats2_2$TissueOrGonadType = factor(plotStats2_2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))
ggplot(plotStats2_2[which(plotStats2_2$TissueOrGonadType %in% c("IT","MT")),], aes(TissueOrGonadType, TotCopies, color=VGLL3RECPRO)) +
geom_point(size=3, position=position_dodge(width=0.05)) +
geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1, position=position_dodge(width=0.05)) +
theme_classic() +
scale_color_manual(name=expression(~italic("vgll3")),values=c("blue","darkgrey","red")) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
theme(axis.title.x=element_blank(),
axis.text.x=element_text(hjust=1, vjust=1, size=14,color="black",angle=65),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
legend.position="right",
legend.title = element_text(size=14))+
#geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))
Combine result tables from QuantStudio output. Channel 1 (FAM) Amh channel 2 (VIC) Igf3
# load results
ddPCRresAmhIgf1 = rbind(ddPCRresAmhIgf1_1,ddPCRresAmhIgf1_2)
colnames(ddPCRresAmhIgf1) = iconv(colnames(ddPCRresAmhIgf1), to = "ASCII", sub = "u")
colnames(ddPCRresAmhIgf1) = c(colnames(ddPCRresAmhIgf1)[-2],"Empty")
# calculate efficiency factor for amh (FAM) vs igf3 (VIC) probes based on genomic DNA
amhPos = ddPCRresAmhIgf1$Positives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch1Positive Control")]
amhNeg = ddPCRresAmhIgf1$Negatives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch1Positive Control")]
igf3Pos = ddPCRresAmhIgf1$Positives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch2Positive Control")]
igf3Neg = ddPCRresAmhIgf1$Negatives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch2Positive Control")]
effFactor2 = mean(apply(cbind(amhPos,igf3Pos,amhNeg,igf3Neg), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]]))
Flag and remove amh/igf3 expression data for samples where the RT-ddPCR reaction was saturated with positive droplets (more than 98% of all droplets are positive). In the figures below, saturated samples are plotted in red and non-saturated samples in black.
# can't use reactions where positives are saturated
plot(ddPCRresAmhIgf1$Positives,
ddPCRresAmhIgf1$Positives+ddPCRresAmhIgf1$Negatives,
col=as.factor(ddPCRresAmhIgf1$Positives/c(ddPCRresAmhIgf1$Negatives+ddPCRresAmhIgf1$Positives)>0.98),ylab="Total number of droplets", xlab="Number of positive droplets")
# add a flag to samples that are not saturated
ddPCRresAmhIgf1$AbundanceOK = ddPCRresAmhIgf1$Positives/c(ddPCRresAmhIgf1$Negatives+ddPCRresAmhIgf1$Positives)<0.98
# clean for samples where Amh:Igf1 reaction has OK negative:positive droplet numbers (on both channels)
ddPCRresAmhIgf1$Remove = NA
for (i in unique(ddPCRresAmhIgf1$Sample[which(ddPCRresAmhIgf1$Sample != "H2O")])){
tmp = ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),]
if (all(tmp$AbundanceOK)){
ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),"Remove"] = FALSE
}
else {
ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),"Remove"] = TRUE
}
}
ddPCRresAmhIgf1$Remove[which(ddPCRresAmhIgf1$Sample == "H2O")] = TRUE
ddPCRresAmhIgf1Clean = data.frame()
ddPCRresAmhIgf1Clean = ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Remove == F),]
# verify that abundances are now ok
plot(ddPCRresAmhIgf1Clean$Positives,
ddPCRresAmhIgf1Clean$Positives+ddPCRresAmhIgf1Clean$Negatives,
col=as.factor(ddPCRresAmhIgf1Clean$Positives/c(ddPCRresAmhIgf1Clean$Negatives+ddPCRresAmhIgf1Clean$Positives)>0.98),ylab="Total number of droplets", xlab="Number of positive droplets")
Clean for any samples not analyzed for vgll3.
# clean results from any samples not analyzed for vgll3
ddPCRresAmhIgf1Clean = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$Sample %in% plotData2$Sample),]
Split the ddPCR output file into Channel 1 (Amh) and Channel 2 (Igf1) to avoid bugs.
ddPCRresAmh = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$TargetType == "Ch1Unknown"),]
ddPCRresIgf1 = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$TargetType == "Ch2Unknown"),]
Add columns to existing data frame.
# add Amh copies
plotData2$AmhConc = NA
plotData2$AmhConc = replace(plotData2$AmhConc,
match(ddPCRresAmh$Sample,
plotData2$Sample),
ddPCRresAmh$CopiesPer20uLWell[which(ddPCRresAmh$TargetType == "Ch1Unknown")])
# add Igf1 copies
plotData2$Igf1Conc = NA
plotData2$Igf1Conc = replace(plotData2$Igf1Conc,
match(ddPCRresIgf1$Sample,
plotData2$Sample),
ddPCRresIgf1$CopiesPer20uLWell[which(ddPCRresIgf1$TargetType == "Ch2Unknown")])
# Amh:Igf1 ratio of concentrations
plotData2$AmhIgf1ConcRatio = NA
plotData2$AmhIgf1ConcRatio = replace(plotData2$AmhIgf1ConcRatio,
match(ddPCRresAmh$Sample,
plotData2$Sample),
ddPCRresAmh$Ratio[which(ddPCRresAmh$TargetType == "Ch1Unknown")])
# Amh:Igf1 fractional abundance
plotData2$AmhIgf1FA = NA
plotData2$AmhIgf1FA = replace(plotData2$AmhIgf1FA,
match(ddPCRresAmh$Sample,
plotData2$Sample),
ddPCRresAmh$FractionalAbundance[which(ddPCRresAmh$TargetType == "Ch1Unknown")])
# Amh:igf3 Poisson rate ratio (add speudocount +1 to avoid Inf values)
plotData2$AmhPos = NA
plotData2$AmhPos = replace(plotData2$AmhPos,
match(ddPCRresAmh$Sample,
plotData2$Sample),
ddPCRresAmh$"Ch1+Ch2-"[which(ddPCRresAmh$TargetType == "Ch1Unknown")])
plotData2$AmhPos = plotData2$AmhPos + 1
plotData2$Igf1Pos = NA
plotData2$Igf1Pos = replace(plotData2$Igf1Pos,
match(ddPCRresIgf1$Sample,
plotData2$Sample),
ddPCRresIgf1$"Ch1-Ch2+"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")])
plotData2$Igf1Pos = plotData2$Igf1Pos + 1
plotData2$AmhNeg = NA
plotData2$AmhNeg = replace(plotData2$AmhNeg,
match(ddPCRresAmh$Sample,
plotData2$Sample),
c(ddPCRresAmh$"Ch1-Ch2-"[which(ddPCRresAmh$TargetType == "Ch1Unknown")]+ddPCRresAmh$"Ch1-Ch2+"[which(ddPCRresAmh$TargetType == "Ch1Unknown")]))
plotData2$AmhNeg = plotData2$AmhNeg + 1
plotData2$Igf1Neg = NA
plotData2$Igf1Neg = replace(plotData2$Igf1Neg,
match(ddPCRresIgf1$Sample,
plotData2$Sample),
c(ddPCRresIgf1$"Ch1-Ch2-"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")]+ddPCRresIgf1$"Ch1+Ch2-"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")]))
plotData2$Igf1Neg = plotData2$Igf1Neg + 1
Subset for males with Amh and Igf1 expression data
plotData3 = plotData2[which(is.na(plotData2$AmhIgf1ConcRatio) == F & plotData2$Gonads != "m"),]
plotData3$AmhIgf3PoissonRateRatio = apply(plotData3[,c("AmhPos","Igf1Pos","AmhNeg","Igf1Neg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]),r=effFactor2)[["estimate"]])
Amh/igf3 log- Poisson rate ratio in mature, immature and pubertal gonads.
# tissue labels
tissueLabels = c('IT'="Immature","PT"="Pubertal","MT"="Mature")
ggplot(plotData3)+
geom_quasirandom(aes(TissueOrGonadType,log(AmhIgf3PoissonRateRatio, base=2),color=TissueOrGonadType),size=5,alpha=0.7) +
scale_color_manual(values=c("darkgrey","black","grey")) +
theme_classic() +
facet_wrap(~VGLL3) +
scale_x_discrete(breaks=c("IT","PT","MT"),labels=c("Immature","Pubertal","Mature")) +
theme(axis.text.x=element_text(size=14,color="black"),
axis.title.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
legend.position="none") +
ylab("log2 amh/igf3 rate ratio") +
xlab("Gonad maturity")
Unsupervised clustering of samples according to amh/igf3 ratio and vgll3 expression using Gaussian mixture modeling as implemented in the package Mclust.
clustTable = as.data.frame(cbind(plotData3$TotCopies,log(plotData3$AmhIgf3PoissonRateRatio, base=2)))
BIC = mclustBIC(clustTable)
summary(BIC)
## Best BIC values:
## VEI,2 VVI,2 VEE,2
## BIC -977.5309 -978.777440 -981.250501
## BIC diff 0.0000 -1.246513 -3.719575
mod = Mclust(clustTable,x=BIC)
summary(mod,parameter=T)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VEI (diagonal, equal shape) model with 2 components:
##
## log-likelihood n df BIC ICL
## -472.5237 58 8 -977.5309 -980.0985
##
## Clustering table:
## 1 2
## 40 18
##
## Mixing probabilities:
## 1 2
## 0.7089881 0.2910119
##
## Means:
## [,1] [,2]
## V1 103.523888 23.4819369
## V2 9.350553 0.3410666
##
## Variances:
## [,,1]
## V1 V2
## V1 2202.634 0.00000
## V2 0.000 26.19155
## [,,2]
## V1 V2
## V1 187.219 0.000000
## V2 0.000 2.226224
# plots of best model/classification
fviz_mclust(mod, "BIC", palette = "jco")
fviz_mclust(mod, "classification", geom = "point", pointsize = 1.5, palette = "jco", stand=F, ellipse.level=0.68)
fviz_mclust(mod, "uncertainty", palette = "jco", stand=F)
plot(mod,what="density")
plot(mod, what = "density", type = "persp")
LRT = mclustBootstrapLRT(clustTable, nboot=999, modelName = "VEI")
LRT
## -------------------------------------------------------------
## Bootstrap sequential LRT for the number of mixture components
## -------------------------------------------------------------
## Model = VEI
## Replications = 999
## LRTS bootstrap p-value
## 1 vs 2 55.07579 0.001
## 2 vs 3 10.67646 0.293
Plot of Mclust best classification (vs vgll3 genotype).
# visualize Mclust clustering confidence interval by adding a layer of this ggplot object
ellipsePlot = fviz_mclust(mod, "classification", geom = "point", stand=F, ellipse.level=0.95)
ggplot(plotData3)+
geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=VGLL3),size=4,alpha=0.7)+
#scale_color_manual(values=c("black","darkgrey")) +
scale_fill_manual(values=c("grey","grey", "blue","darkgreen","orange","red")) +
#scale_fill_manual(values=c("slategrey","grey")) +
scale_shape_manual(values=c(21,22,23)) +
ellipsePlot$layers[[2]] +
geom_label(aes(x=60,y=-4), label="Mature cluster") +
geom_label(aes(x=120,y=22), label="Immature cluster") +
theme_classic() +
ylab("log2 amh/igf3 rate ratio") +
xlab("vgll3 copies / ng RNA") +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black"),
legend.position="top")
Plot of Mclust best model classification. We observe that “pubertal” samples show a large range of amh/igf3 expression ratio and vgll3 expression, indicating that the class contains samples that were either in ht early stages of transitioning towards maturity, or samples that had reached maturity while resembling immature gonads more than mature gonads in morphology.
plotData3$clust = mod$classification
bxFig = ggplot(plotData3)+
geom_quasirandom(aes(TissueOrGonadType,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType),color="black",fill="darkgrey",size=5,alpha=0.7) +
scale_fill_manual(values=c("darkgrey","darkgrey")) +
scale_shape_manual(values=c(21,22,23)) +
theme_classic() +
scale_x_discrete(breaks=c("IT","MT","PT"),labels=c("Immature","Mature","Pubertal")) +
theme(axis.text.x=element_text(size=14,color="black"),
axis.title.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
legend.position="none") +
ylab("log2 amh/igf3 rate ratio") +
xlab("Gonad maturity")
scatterFig = ggplot(plotData3)+
geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=as.factor(clust)),size=4,alpha=0.7)+
scale_color_manual(values=c("black","darkgrey")) +
scale_fill_manual(values=c("slategrey","grey")) +
scale_shape_manual(values=c(21,22,23)) +
ellipsePlot$layers[[2]] +
geom_label(aes(x=60,y=-4), label="Mature cluster") +
geom_label(aes(x=120,y=22), label="Immature cluster") +
theme_classic() +
ylab("log2 amh/igf3 rate ratio") +
xlab("vgll3 copies / ng RNA") +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black"),
legend.position="none")
ggarrange(bxFig, scatterFig,
labels = c("a","b"),
ncol = 2, nrow = 1,
widths = c(0.7, 1))
scatterFig2 = ggplot(plotData3)+
ellipsePlot$layers[[2]] +
geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=as.factor(clust)),size=3,alpha=0.7)+
scale_color_manual(values=c("darkgrey","black")) +
scale_fill_manual(values=c("grey", "slategrey")) +
scale_shape_manual(values=c(21,22,23)) +
geom_label(aes(x=60,y=-4), label="Mature cluster",size=3) +
geom_label(aes(x=120,y=22), label="Immature cluster",size=3) +
theme_classic() +
ylab(expression('log2'~italic(amh/igf3)~'rate ratio')) +
xlab(expression(~italic(vgll3)~'copies / ng RNA')) +
theme(axis.text=element_text(size=10,color="black"),
axis.title=element_text(size=10,color="black"),
legend.position="none")
scatterFig2
Below we calculate age for samples based on date when 90% of eggs had hatched.
convertToDate = function(x){
x = as.character(x)
if (strsplit(x,"") %>% unlist() %>% length() == 8){
day = unlist(strsplit(x,""))[1:2] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("d",c("V1","V2"),sep="")
month = unlist(strsplit(x,""))[3:4] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("m",c("V1","V2"),sep="")
year = unlist(strsplit(x,""))[5:8] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("y",c("V1","V2","V3","V4"),sep="")
return(paste(month,day,year,sep="/"))
}
else {
day = paste(0,unlist(strsplit(x,""))[1],sep="")
month = unlist(strsplit(x,""))[2:3] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("m",c("V1","V2"),sep="")
year = unlist(strsplit(x,""))[4:7] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("y",c("V1","V2","V3","V4"),sep="")
return(paste(month,day,year,sep="/"))
}
}
daysPostHatch = function(x){
datetimes = strptime(c("12/29/2017",x), format = "%m/%d/%Y")
return(as.numeric(difftime(datetimes[2], datetimes[1], units = "days")))
}
# OUL warm 90% hatch date 12/29/2017
plotData2$Date = unlist(lapply(plotData2$CollectionDate,convertToDate))
plotData2$DPH = unlist(lapply(plotData2$Date,daysPostHatch))
plotData3$Date = unlist(lapply(plotData3$CollectionDate,convertToDate))
plotData3$DPH = unlist(lapply(plotData3$Date,daysPostHatch))
Plot of tissue expression levels as above but where age is converted from months post-hatch to days post-hatch.
plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT", "MT","SG","Sp"))
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")
allFigDPH = ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT","MT","SG","Sp")),],aes(DPH, TotCopies)) +
geom_smooth(data = plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT","MT")),],fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25) +
theme_classic() +
theme(axis.text=element_text(size=10,color="black"),
axis.title=element_text(size=10,color="black"),
strip.text.x=element_text(size=10),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
ylim(0,350) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
allFigDPH
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 39 rows containing missing values (geom_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# color points according to vgll3 genotypes
allFigDPHcolor = ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT")),],aes(DPH, TotCopies,colour=VGLL3RECPRO)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(fill="black",alpha=0.25) +
scale_color_manual(name=expression(~italic("vgll3")), values=c( "blue","darkgrey","red")) +
theme_classic() +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black"),
strip.text.x=element_text(size=14),
legend.position="right",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
allFigDPHcolor
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Reordered panels for presentation.
plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Ad","Ov","IT","Hr","Liv","Mc","Br","Fry","PT", "SG","MT","Sp"))
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25) +
theme_classic() +
theme(axis.text=element_text(size=10,color="black"),
axis.title=element_text(size=10,color="black"),
strip.text.x=element_text(size=10),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggarrange(allFigDPH,
ggarrange(testesFig2,scatterFig2, ncol = 2, labels = c("B", "C"), widths = c(0.7, 1)),
nrow = 2, heights=c(1.5,1),
labels = "A")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 39 rows containing missing values (geom_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
Plot all tissues for supplemental figure.
# re-order tissues
plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT", "SG","MT","Sp"))
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","PT"="Pubertal testes", "MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")
# color points according to vgll3 genotypes
allTissuesGenotypes = ggplot(plotData2,aes(DPH, TotCopies,colour=VGLL3RECPRO)) +
#geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(fill="black",alpha=0.25) +
scale_color_manual(name=expression(~italic("vgll3")), values=c( "blue","darkgrey","red")) +
theme_classic() +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black"),
strip.text.x=element_text(size=14),
legend.position="right",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
allTissuesGenotypes
Individual plots
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Liv")),],aes(DPH, TotCopies, color=VGLL3)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Mc")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ad")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ov")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("IT")),],aes(DPH, TotCopies)) +
geom_smooth(fill="grey70",colour="aquamarine4") +
geom_point(colour="black",fill="black",alpha=0.25,size=4) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Average plots.
sexLabels = c('m'="Female","s"="Immature male","xl"="Mature male")
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr")),],aes(VGLL3, TotCopies,color=VGLL3)) +
geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
geom_quasirandom(alpha=0.25,size=4) +
scale_color_manual(values=c("blue","red")) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
ggtitle("Heart") +
xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ad")),],aes(VGLL3, TotCopies,color=VGLL3)) +
geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
geom_quasirandom(alpha=0.25,size=4) +
scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
ggtitle("Adipose") +
xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
#t.test(plotData2$TotCopies[which(plotData2$VGLL3=="EE" & plotData2$TissueOrGonadType %in% c("Ad"))],plotData2$TotCopies[which(plotData2$VGLL3=="LL" & plotData2$TissueOrGonadType %in% c("Ad"))])
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Mc")),],aes(VGLL3, TotCopies,color=VGLL3)) +
geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
geom_quasirandom(alpha=0.25,size=4) +
scale_color_manual(values=c("blue","red")) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
ggtitle("Muscle") +
xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ov")),],aes(VGLL3, TotCopies,color=VGLL3)) +
geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
geom_quasirandom(alpha=0.25,size=4) +
scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
ggtitle("Ovaries") +
xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("IT")),],aes(VGLL3, TotCopies,color=VGLL3)) +
geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
geom_quasirandom(alpha=0.25,size=4) +
scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
theme_classic() +
theme(axis.text=element_text(size=16,color="black"),
axis.title=element_text(size=16,color="black"),
strip.text.x=element_text(size=16),
legend.position="none",
strip.background = element_blank()) +
facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
ggtitle("Immature testes") +
xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
Plot of EE & LL vgll3 expression time-series (immature gonads). Time-point 13 is exlcuded because there’s no immature EE data.
# subset for male gonads (immature) and only EE & LL genotypes - exclude TP 13 because only LL data
plotData4 = plotData2[which(plotData2$TissueOrRep=="Gn" & plotData2$TissueOrGonadType %in% c("IT") & plotData2$VGLL3 %in% c("EE","LL") & plotData2$Timepoint!=13),]
# fit polynomial regression - test models with and without interaction term
fitSimple <- lm(plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$DPH)
fitFull <- lm(plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$VGLL3 * plotData4$DPH)
fitVgll3 = lm(plotData4$TotCopies ~ plotData4$VGLL3)
fitAge = lm(plotData4$TotCopies ~ plotData4$DPH)
# select for the best model
AIC(fitSimple,fitFull)
## df AIC
## fitSimple 4 354.3633
## fitFull 5 355.6761
BIC(fitSimple,fitFull)
## df BIC
## fitSimple 4 360.6973
## fitFull 5 363.5937
# significance of the models
summary(fitSimple)
##
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$DPH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.077 -18.849 7.399 19.450 59.502
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.3778 33.6170 1.707 0.0973 .
## plotData4$VGLL3LL 21.8992 10.7709 2.033 0.0501 .
## plotData4$DPH 0.1105 0.1238 0.893 0.3782
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.04 on 33 degrees of freedom
## Multiple R-squared: 0.1315, Adjusted R-squared: 0.07881
## F-statistic: 2.497 on 2 and 33 DF, p-value: 0.09774
summary(fitFull)
##
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$VGLL3 *
## plotData4$DPH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.872 -19.611 8.308 16.811 58.086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.18312 59.87614 1.606 0.118
## plotData4$VGLL3LL -33.98100 71.97730 -0.472 0.640
## plotData4$DPH -0.03725 0.22563 -0.165 0.870
## plotData4$VGLL3LL:plotData4$DPH 0.21245 0.27053 0.785 0.438
##
## Residual standard error: 31.22 on 32 degrees of freedom
## Multiple R-squared: 0.1479, Adjusted R-squared: 0.06799
## F-statistic: 1.851 on 3 and 32 DF, p-value: 0.1578
summary(fitVgll3)
##
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -99.847 -18.269 7.949 19.434 61.923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.402 8.582 10.067 9.82e-12 ***
## plotData4$VGLL3LL 22.062 10.737 2.055 0.0477 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.94 on 34 degrees of freedom
## Multiple R-squared: 0.1105, Adjusted R-squared: 0.08429
## F-statistic: 4.222 on 1 and 34 DF, p-value: 0.04765
summary(fitAge)
##
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$DPH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -88.026 -20.139 -1.631 26.981 67.315
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.2455 34.5039 2.036 0.0496 *
## plotData4$DPH 0.1148 0.1293 0.888 0.3809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.44 on 34 degrees of freedom
## Multiple R-squared: 0.02265, Adjusted R-squared: -0.006092
## F-statistic: 0.7881 on 1 and 34 DF, p-value: 0.3809
# are the slopes of the linear model different for EE and LL?
# option 1: look at interaction term
anova(lm(plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$VGLL3 * plotData4$DPH))
## Analysis of Variance Table
##
## Response: plotData4$TotCopies
## Df Sum Sq Mean Sq F value Pr(>F)
## plotData4$VGLL3 1 4042.6 4042.6 4.1480 0.05003 .
## plotData4$DPH 1 768.4 768.4 0.7885 0.38119
## plotData4$VGLL3:plotData4$DPH 1 601.0 601.0 0.6167 0.43805
## Residuals 32 31186.9 974.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# calculate fitted values for means and 95% confidence interval
plotData4$fittedSimple = predict(fitSimple)
plotData4$fittedSimpleLwr = predict(fitSimple,interval="confidence")[,'lwr']
plotData4$fittedSimpleUpr = predict(fitSimple,interval="confidence")[,'upr']
# confidence interval for EE-LL contrast
marginal= emmeans(fitSimple, ~ VGLL3, weights="proportional")
pairs(marginal)
## contrast estimate SE df t.ratio p.value
## EE - LL -21.9 10.8 33 -2.033 0.0501
# contrast p-value
my_comparisons1 = data.frame(start=c("EE"),
end=c("LL"),
y=c(145,145),
label=c("*"))
# plot EE & LL
expPlot = ggplot(plotData4, aes(DPH,TotCopies,colour=VGLL3,group=VGLL3)) +
geom_jitter(size=4,alpha=0.7) +
theme_classic() +
scale_colour_manual(values=c("blue","red")) +
geom_smooth(aes(DPH,fittedSimple),method="loess") +
#geom_ribbon(aes(ymin=fittedSimpleLwr, ymax=fittedSimpleUpr), alpha=0.1, color=NA) +
xlab("Age (days post-hatch)") +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
#geom_text(aes(label=Sample),position=position_jitter(width=0.2),hjust=0, vjust=0,size=3) +
ylim(0,250) +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black")
,plot.title = element_text(size=14, face="bold",hjust=0.5),
legend.position = "none")
insert = ggplot(as.data.frame(marginal)) +
geom_pointrange(aes(y=emmean, ymin=lower.CL, ymax=upper.CL, x=VGLL3,color=VGLL3),size=1) +
scale_color_manual(values=c("blue","red")) +
geom_signif(data=my_comparisons1,
aes(xmin=start, xmax=end, annotations=label, y_position=y),
textsize = 3, size=0.5, tip_length = 0.01,
manual=TRUE) +
ylab("EMM") +
xlab(expression(~italic(vgll3)~'')) +
theme_classic() +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black")
,plot.title = element_text(size=14, face="bold",hjust=0.5),
legend.position = "none")
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
ggdraw() +
draw_plot(expPlot) +
draw_plot(insert, x = 0.17, y = .65, width = .2, height = .3)
## `geom_smooth()` using formula 'y ~ x'
Model female gonad EE and LL expression.
# subset for female gonads (both immature and mature) and only EE & LL genotypes
plotData8 = plotData2[which(plotData2$TissueOrRep=="Gn" & plotData2$TissueOrGonadType %in% c("Ov") & plotData2$VGLL3 %in% c("EE","LL")),]
# fit linear regression - alternatively (use poly() to perform the polynomial regressions)
fitSimple <- lm(plotData8$TotCopies ~ plotData8$VGLL3 + plotData8$DPH)
Anova(fitSimple)
## Anova Table (Type II tests)
##
## Response: plotData8$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData8$VGLL3 607.2 1 2.7967 0.105598
## plotData8$DPH 2620.0 1 12.0668 0.001688 **
## Residuals 6079.4 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# significance of the model
summary(fitSimple)
##
## Call:
## lm(formula = plotData8$TotCopies ~ plotData8$VGLL3 + plotData8$DPH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.589 -8.882 -2.587 8.741 28.881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.93409 13.88427 5.685 4.28e-06 ***
## plotData8$VGLL3LL 8.86503 5.30100 1.672 0.10560
## plotData8$DPH -0.17792 0.05122 -3.474 0.00169 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.74 on 28 degrees of freedom
## Multiple R-squared: 0.355, Adjusted R-squared: 0.3089
## F-statistic: 7.706 on 2 and 28 DF, p-value: 0.002156
# confidence interval for EE-LL contrast
marginal= emmeans(fitSimple, ~ VGLL3, weights="proportional")
pairs(marginal)
## contrast estimate SE df t.ratio p.value
## EE - LL -8.87 5.3 28 -1.672 0.1056
# calculate fitted values
plotData8$fittedSimple = predict(fitSimple)
# plot EE & LL
ggplot(plotData8, aes(DPH,TotCopies,color=VGLL3,group=VGLL3)) +
geom_jitter(size=4,alpha=0.7) +
theme_classic() +
scale_color_manual(values=c("blue","red")) +
geom_smooth(aes(DPH,fittedSimple),method="loess") +
xlab("Age (days post-hatch)") +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
theme(axis.text=element_text(size=14,color="black"),
axis.title=element_text(size=14,color="black"),
plot.title = element_text(size=14, face="bold",hjust=0.5),
legend.position = "none")
## `geom_smooth()` using formula 'y ~ x'
Plot heart expression for males and females. Compile plot data and fitted values from lm into one data frame
# split by gonad type
plotData10 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("s") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData11 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("m") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData12 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("xl") & plotData2$VGLL3 %in% c("EE","LL")),]
# predict fitted values
fitSimple1 <- lm(plotData10$TotCopies ~ plotData10$VGLL3 + plotData10$DPH)
Anova(fitSimple1)
## Anova Table (Type II tests)
##
## Response: plotData10$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData10$VGLL3 26197 1 3.7175 0.06483 .
## plotData10$DPH 23611 1 3.3505 0.07867 .
## Residuals 183219 26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plotData10$fittedSimple = predict(fitSimple1)
fitSimple2 <- lm(plotData11$TotCopies ~ plotData11$VGLL3 + plotData11$DPH)
Anova(fitSimple2)
## Anova Table (Type II tests)
##
## Response: plotData11$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData11$VGLL3 690 1 0.1513 0.6998
## plotData11$DPH 10259 1 2.2490 0.1432
## Residuals 150530 33
plotData11$fittedSimple = predict(fitSimple2)
fitSimple3 <- lm(plotData12$TotCopies ~ plotData12$VGLL3 + plotData12$DPH)
Anova(fitSimple3)
## Anova Table (Type II tests)
##
## Response: plotData12$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData12$VGLL3 1830.0 1 1.0108 0.3609
## plotData12$DPH 725.7 1 0.4008 0.5545
## Residuals 9052.6 5
plotData12$fittedSimple = predict(fitSimple3)
# combine into one data frame
plotDataHeart = rbind(plotData10,plotData11,plotData12)
# plot
tissueLabels = c("m"="Female","s"="Immature male","xl"="Mature male")
ggplot(plotDataHeart,aes(DPH,TotCopies,group=VGLL3, color=VGLL3)) +
geom_point(aes(DPH, TotCopies),size=3,alpha=0.7) +
facet_wrap(~Gonads,labeller=labeller(Gonads=tissueLabels)) +
geom_line(aes(y=fittedSimple)) +
theme_classic() +
scale_colour_manual(values=c("blue","red")) +
theme(axis.title.y=element_text(size=16),
axis.text.y=element_text(size=16, color="black"),
axis.title.x=element_text(size=16),
axis.text.x=element_text(hjust=1, vjust=0.5, size=16, color="black", angle=90),
strip.text.x=element_text(size=16),
strip.background = element_blank()) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
# heart expression difference between immature and mature maless - means
t.test(
plotData2$TotCopies[which(plotData2$TissueOrRep=='Hr' & plotData2$Gonads == "xl")],
plotData2$TotCopies[which(plotData2$TissueOrRep=='Hr' & plotData2$Gonads == "s")])
##
## Welch Two Sample t-test
##
## data: plotData2$TotCopies[which(plotData2$TissueOrRep == "Hr" & plotData2$Gonads == "xl")] and plotData2$TotCopies[which(plotData2$TissueOrRep == "Hr" & plotData2$Gonads == "s")]
## t = -3.0359, df = 27.8, p-value = 0.005163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -112.96981 -21.92361
## sample estimates:
## mean of x mean of y
## 61.85497 129.30168
Plot adipose expression for males and females.
# split by gonad type
plotData13 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("s") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData14 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("m") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData15 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("xl") & plotData2$VGLL3 %in% c("EE","LL")),]
# predict fitted values
fitSimple4 <- lm(plotData13$TotCopies ~ plotData13$VGLL3 + plotData13$DPH)
Anova(fitSimple4)
## Anova Table (Type II tests)
##
## Response: plotData13$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData13$VGLL3 468.4 1 1.9737 0.1734
## plotData13$DPH 3.0 1 0.0125 0.9118
## Residuals 5458.2 23
plotData13$fittedSimple = predict(fitSimple4)
fitSimple5 <- lm(plotData14$TotCopies ~ plotData14$VGLL3 + plotData14$DPH)
Anova(fitSimple5)
## Anova Table (Type II tests)
##
## Response: plotData14$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData14$VGLL3 1.0 1 0.0030 0.9566
## plotData14$DPH 504.5 1 1.5226 0.2259
## Residuals 10934.3 33
plotData14$fittedSimple = predict(fitSimple5)
fitSimple6 <- lm(plotData15$TotCopies ~ plotData15$VGLL3 + plotData15$DPH)
Anova(fitSimple6)
## Anova Table (Type II tests)
##
## Response: plotData15$TotCopies
## Sum Sq Df F value Pr(>F)
## plotData15$VGLL3 185.97 1 0.2881 0.6199
## plotData15$DPH 59.98 1 0.0929 0.7757
## Residuals 2582.12 4
plotData15$fittedSimple = predict(fitSimple6)
# combine into one data frame
plotDataAdipose = rbind(plotData13,plotData14,plotData15)
# plot
tissueLabels = c("m"="Female","s"="Immature male","xl"="Mature male")
ggplot(plotDataAdipose,aes(DPH,TotCopies,group=VGLL3, color=VGLL3)) +
geom_point(aes(DPH, TotCopies),size=3,alpha=0.7) +
facet_wrap(~Gonads,labeller=labeller(Gonads=tissueLabels)) +
geom_line(aes(y=fittedSimple)) +
theme_classic() +
scale_colour_manual(values=c("blue","red")) +
theme(axis.title.y=element_text(size=16),
axis.text.y=element_text(size=16, color="black"),
axis.title.x=element_text(size=16),
axis.text.x=element_text(hjust=1, vjust=0.5, size=16, color="black", angle=90),
strip.text.x=element_text(size=16),
strip.background = element_blank()) +
ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
xlab("Age (days post-hatch)")
Data from averaaged replicates (N=2).
colnames(scamp1Means)[2]="TotCopies"
colnames(vgll3Exon1Means)[2]="TotCopies"
scamp1Means$Assay = "scamp1"
vgll3Mis1Means$Assay = "vgll3Mis1"
vgll3Mis2Means$Assay = "vgll3Mis2"
vgll3Exon1Means$Assay = "vgll3Exon1"
head(scamp1Means)
## Sample TotCopies sd sem n VGLL3 Assay
## 1 282-Gn 419.3939 NA NA 1 EE scamp1
## 2 283-Gn 517.5627 NA NA 1 LL scamp1
## 3 300-Gn 474.7628 NA NA 1 LL scamp1
## 4 320-Gn 460.4520 NA NA 1 LL scamp1
## 5 327-Gn 571.4286 NA NA 1 EE scamp1
## 6 328-Gn 360.8355 NA NA 1 LL scamp1
head(vgll3Mis1Means)
## Sample TotCopies sd sem n Assay VGLL3
## 1 282-Gn 86.0067 25.173906 17.800639 2 vgll3Mis1 EE
## 2 283-Gn 108.5455 10.564514 7.470240 2 vgll3Mis1 LL
## 3 300-Gn 113.9462 20.935795 14.803843 2 vgll3Mis1 LL
## 4 320-Gn 103.7675 1.914799 1.353968 2 vgll3Mis1 LL
## 5 327-Gn 101.9139 71.793047 50.765350 2 vgll3Mis1 EE
## 6 328-Gn 110.6383 27.418472 19.387787 2 vgll3Mis1 LL
head(vgll3Mis2Means)
## Sample TotCopies sd sem n VGLL3 Assay
## 1 282-Gn 69.98485 21.877455 15.469697 2 EE vgll3Mis2
## 2 283-Gn 77.65591 5.200656 3.677419 2 LL vgll3Mis2
## 3 300-Gn 76.66034 NA NA 1 LL vgll3Mis2
## 4 320-Gn 62.76412 22.789213 16.114407 2 LL vgll3Mis2
## 5 327-Gn 110.51213 29.732792 21.024259 2 EE vgll3Mis2
## 6 328-Gn 63.20627 16.571777 11.718016 2 LL vgll3Mis2
head(vgll3Exon1Means)
## Sample TotCopies sd sem n VGLL3 Assay
## 1 282-Gn 103.9394 NA NA 1 EE vgll3Exon1
## 2 283-Gn 147.6703 NA NA 1 LL vgll3Exon1
## 3 300-Gn 129.7913 NA NA 1 LL vgll3Exon1
## 4 320-Gn 122.3164 NA NA 1 LL vgll3Exon1
## 5 327-Gn 214.5553 NA NA 1 EE vgll3Exon1
## 6 328-Gn 126.3708 NA NA 1 LL vgll3Exon1
combinedAssays = rbind(scamp1Means,vgll3Exon1Means,vgll3Mis1Means,vgll3Mis2Means)
head(combinedAssays)
## Sample TotCopies sd sem n VGLL3 Assay
## 1 282-Gn 419.3939 NA NA 1 EE scamp1
## 2 283-Gn 517.5627 NA NA 1 LL scamp1
## 3 300-Gn 474.7628 NA NA 1 LL scamp1
## 4 320-Gn 460.4520 NA NA 1 LL scamp1
## 5 327-Gn 571.4286 NA NA 1 EE scamp1
## 6 328-Gn 360.8355 NA NA 1 LL scamp1
Plot expression levels in different exons.
assayLabels = c("vgll3Exon1"="Exon 1-2 boundary","vgll3Mis1"="Exon 2","vgll3Mis2"="Exon 3")
ggplot(combinedAssays[which(combinedAssays$Assay != "scamp1"),],aes(VGLL3,TotCopies,color=VGLL3))+
#geom_boxplot(outlier.alpha = 0) +
geom_quasirandom(size=5,alpha=0.3) +
stat_summary(fun=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
scale_color_manual(values=c("blue","red")) +
facet_wrap(~Assay, labeller=labeller(Assay=assayLabels)) +
theme_classic() +
theme(axis.text.x=element_text(size=14,color="black"),
axis.title.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
strip.background=element_blank(),
strip.text=element_text(size=14,color="black"),
legend.position="none") +
xlab("Age (days post-hatch)") +
ylab("copies / ng RNA")
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing missing values (position_quasirandom).
t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "EE")],
combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "LL")])
##
## Welch Two Sample t-test
##
## data: combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "LL")]
## t = 0.58699, df = 18.486, p-value = 0.5643
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -17.37566 30.88477
## sample estimates:
## mean of x mean of y
## 138.0463 131.2918
t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")],
combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")])
##
## Welch Two Sample t-test
##
## data: combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")]
## t = -2.5454, df = 31.038, p-value = 0.0161
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -27.539557 -3.039125
## sample estimates:
## mean of x mean of y
## 91.06771 106.35705
t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")],
combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")])
##
## Welch Two Sample t-test
##
## data: combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")]
## t = 2.0689, df = 25.169, p-value = 0.04898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.08705111 35.88134301
## sample estimates:
## mean of x mean of y
## 97.23139 79.24719
Expression levels of the alleles in heterozygotes. The column FractionalAbundance gives Ch1 versus Ch2 percentage of counts relative to the sum of counts. This is within samples so we don’t need to take into account the starting amount of RNA. If there is a cis-regulatory difference that makes E-allele lower expressed it should be seen as ASE towards the L-allele in heterozygotes (keeping in mind the possibly higher efficiency of the L-allele probe). The analysis takes into account the possibly higher efficiency of the L (C)-probe using 1-effFactor correction.
Channel 1 corresponds to FAM dye and the C-allele (late maturation). Channel 2 corresponds to VIC dye and the T-allele (early maturation).
Below we analyze if Ch1 and Ch2 give the same number of copies in the control DNA. Any difference from 50:50 ratios is due to one probe being lower efficiency (giving lower number of counts). We are calculating a correction factor to account for differences in efficiency based on the mean of all e28 DNA controls we are including in the results table.
controlResMis1 = ddPCRres[which(ddPCRres$Sample == "e28-DNA"),]
# for poisson.test --> based on rate ratio
LnegCountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch1Unknown"),"Negatives"]
EnegCountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch2Unknown"),"Negatives"]
LcountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch1Unknown"),"Positives"]
EcountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch2Unknown"),"Positives"]
effFactor = apply(cbind(LcountsCtr,EcountsCtr,LnegCountsCtr,EnegCountsCtr), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
effFactor
## [1] 1.0273950 1.0289535 1.0477013 1.0095218 1.0035206 1.0297707 1.0005037
## [8] 1.0246971 1.0649968 0.9955902
effFactor = mean(effFactor)
paste("Mean of all control reps:",effFactor)
## [1] "Mean of all control reps: 1.0232650904439"
Below we test the difference between L and E allele counts within each heterozygous samples using a comparison of two Poisson rates. There is no consensus in the literature which test to use. For example in RNAseq people typically use a simple binomial test, but there it’s hard to know the “negative” rate required to model the process using a Poisson distribution. Now we have this - it’s the number of negative droplets - so using Poisson modeling may be more appropriate (this is what the QuantaSoft software from Bio-Rad uses). For this we need the number of positive AND negative counts. See more about comparing Poisson rates here: https://cran.r-project.org/web/packages/rateratio.test/vignettes/rateratio.test.pdf We are using the value stored as effFactor (relative freq of channel 1 positives to channel 2 positives based on genomic DNA from a heterozygote) as a null hypothesis.
LnegCountsHet = plotData2$Ch1Neg[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
EnegCountsHet = plotData2$Ch2Neg[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]
LcountsHet = plotData2$Ch1Pos[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
EcountsHet = plotData2$Ch2Pos[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
# Initiate table for results of Poisson test
poissonRes = as.data.frame(matrix(data=NA,ncol=10,nrow=length(LcountsHet),dimnames=list(c(plotData2$Sample[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]),c("Sample","vgll3","sex","TotCopies","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue","Timepoint"))))
poissonRes$Sample = rownames(poissonRes)
poissonRes$vgll3 = plotData2$VGLL3[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$SexPheno = plotData2$SexPheno[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$Gonads = plotData2$Gonads[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$TotCopies = plotData2$TotCopies[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]
poissonRes$Lcounts = LcountsHet
poissonRes$Ecounts = EcountsHet
poissonRes$LcountsNeg = LnegCountsHet
poissonRes$EcountsNeg = EnegCountsHet
poissonRes$Timepoint = plotData2$Timepoint[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]
# Remove na's (individuals that have been collected but that not yet been analyzed).
poissonRes = poissonRes[which(is.na(poissonRes$TotCopies) == F),]
# Test ASE using Poisson rate ratio
poissonRes$poissonPvalue = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["p.value"]])
poissonRes$poissonRateRatio = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["estimate"]])
poissonRes$rateRatioCI95low = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][1])
poissonRes$rateRatioCI95high = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][2])
# Transfer DPH from previous data frames
poissonRes$DPH = plotData2$DPH[match(poissonRes$Sample,plotData2$Sample)]
head(poissonRes)
## Sample vgll3 sex TotCopies Lcounts LcountsNeg Ecounts EcountsNeg
## 269-Gn 269-Gn LE NA 124.72727 1010 16017 920 16107
## 272-Gn 272-Gn EL NA 37.98995 280 15630 227 15683
## 281-Gn 281-Gn LE NA 70.98765 680 13632 685 13627
## 284-Gn 284-Gn EL NA 87.42857 726 10716 716 10726
## 285-Gn 285-Gn LE NA 40.24845 813 10816 740 10889
## 289-Gn 289-Gn LE NA 68.40149 562 13452 512 13502
## poissonPvalue Timepoint SexPheno Gonads poissonRateRatio
## 269-Gn 0.0965949 7 NA m 1.1039948
## 272-Gn 0.0330781 7 NA m 1.2376628
## 281-Gn 0.5882697 7 NA m 0.9923366
## 284-Gn 0.8952450 8 NA s 1.0149127
## 285-Gn 0.1278868 8 NA m 1.1060637
## 289-Gn 0.2340215 8 NA s 1.1017361
## rateRatioCI95low rateRatioCI95high DPH
## 269-Gn 1.0086534 1.208464 193.9583
## 272-Gn 1.0351918 1.481012 193.9583
## 281-Gn 0.8911257 1.105031 193.9583
## 284-Gn 0.9141042 1.126860 219.9583
## 285-Gn 0.9999919 1.223535 219.9583
## 289-Gn 0.9756580 1.244324 219.9583
# sort according to Poisson rate ratio
poissonRes$Sample = factor(poissonRes$Sample, levels = poissonRes$Sample[order(poissonRes$poissonRateRatio)])
Plot Poisson rate ratio of L allele expression versus E allele expression. Null hypothesis of 1:1 expression rate is calculated based on heterozygous DNA as template. Points are colored according to wether the 95% confidence interval of rate ratio includes zero.
plotData9 = plotData2[which(plotData2$VGLL3 %in% c("EL","LE") & plotData2$CollectionDate %in% c("19022018","20032018","20042018","17052018") == F & plotData2$TissueOrGonadType == 'IT'),]
plotData9$poissonPvalue = poissonRes$poissonPvalue[match(plotData9$Sample,poissonRes$Sample)]
plotData9$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData9$Sample,poissonRes$Sample)]
plotData9$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData9$Sample,poissonRes$Sample)]
plotData9$rateRatioCI95low = poissonRes$rateRatioCI95low[match(plotData9$Sample,poissonRes$Sample)]
plotData9$rateRatioCI95high = poissonRes$rateRatioCI95high[match(plotData9$Sample,poissonRes$Sample)]
# order by poisson rate ratio
plotData9$Sample = factor(plotData9$Sample, levels = plotData9$Sample[order(plotData9$poissonRateRatio)])
g = ggplot(plotData9,aes(x=Sample,log(poissonRateRatio,base=2),ymin=log(rateRatioCI95low,base=2),ymax=log(rateRatioCI95high,base=2),color=poissonPvalue<0.05)) +
geom_pointrange(alpha=0.7) +
geom_hline(yintercept = log(effFactor,base=2), linetype=2) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.line.x = element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black")) +
scale_color_manual(values=c("darkgrey","black")) +
theme(legend.position = "none") +
ylab("Log2 L-rate/E-rate") +
ylim(-1.1,1.1)
rateFig = ggMarginal(g, type = "density", fill="darkgrey", margins="y")
rateFig
ggplot(plotData9) +
geom_quasirandom(aes(1, FractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
geom_segment(y=mean(plotData9$FractionalAbundance),yend=mean(plotData9$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
geom_hline(yintercept=mean(controlResMis1$FractionalAbundance,na.rm=T),color="darkgrey",linetype="dotted") +
theme_classic() +
ylab("L/(L+E) allele abundance %") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
legend.position="bottom",
legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
#guides(colour=guide_legend(title.position = "top")) +
scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)
Can ASE account for EE-LL difference in exon 2 assay? Compare number of expressed molecules from each allele.
# average molecule difference between LL and EE
aveHomozDiffExon2 = mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")],na.rm=T) -
mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")],na.rm=T)
# average molecule difference between L and E
aveAlleleDiffExon2 = mean(plotData9$Ch2copies[which(plotData9$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)] ,na.rm=T) - mean(plotData9$Ch1copies[which(plotData9$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)],na.rm=T)
# how much % does 2*average L-E difference account from LL-EE difference (in molecules)?
2*(aveAlleleDiffExon2) / (aveHomozDiffExon2) * 100
## [1] 88.87064
colnames(ddPCRresMis2) = iconv(colnames(ddPCRresMis2), to = "ASCII", sub = "u")
colnames(ddPCRresMis2) = c(colnames(ddPCRresMis2)[-2],"Empty")
colnames(ddPCRresMis2)[1] = "Well"
# assign falsely labeled well
#ddPCRres$Sample[which(ddPCRres$Well == "H04")] = "e28-DNA"
ddPCRresMis2$Sample[which(ddPCRresMis2$Well == "D03")] = "437-Gn"
ddPCRresMis2$Sample[which(ddPCRresMis2$Well == "D09")] = "437-Gn"
# remove failed wells
ddPCRresMis2 = ddPCRresMis2[which(ddPCRresMis2$AcceptedDroplets > 10000),]
# remove samples that could not be re-analyzed from non-diluted RNA samples
ddPCRresMis2 = ddPCRresMis2[which(ddPCRresMis2$Sample %in% c("289-Gn","322-Gn","326-Gn","337-Gn","338-Gn","358-Gn","370-Gn","373-Gn","385-Gn","389-Gn","390-Gn") == F),]
Additional control wells.
colnames(ddPCRresAdd) = iconv(colnames(ddPCRresAdd), to = "ASCII", sub = "u")
colnames(ddPCRresAdd) = c(colnames(ddPCRresAdd)[-2],"Empty")
colnames(ddPCRresAdd)[1] = "Well"
# remove failed wells
ddPCRresAdd = ddPCRresAdd[which(ddPCRresAdd$AcceptedDroplets > 10000),]
controlResMis2 = rbind(ddPCRresMis2[which(ddPCRresMis2$Sample == "e28-DNA"),],ddPCRresAdd[which(ddPCRresAdd$Sample == "e28-DNA"),])
# for poisson.test --> based on rate ratio
LnegCountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch1Unknown"),"Negatives"]
EnegCountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch2Unknown"),"Negatives"]
LcountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch1Unknown"),"Positives"]
EcountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch2Unknown"),"Positives"]
effFactor = apply(cbind(LcountsCtr,EcountsCtr,LnegCountsCtr,EnegCountsCtr), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
effFactor
## [1] 0.9125112 0.9086053 1.0547780 1.0334503 1.0157662 1.0311079 0.8821894
## [8] 1.0048434 0.9456879
effFactor = mean(effFactor)
paste("Mean of all control reps:",effFactor)
## [1] "Mean of all control reps: 0.976548842009717"
plotData16 = ddPCRresMis2
LnegCountsHet = plotData16$Negatives[which(plotData16$TargetType == "Ch1Unknown")]
EnegCountsHet = plotData16$Negatives[which(plotData16$TargetType == "Ch2Unknown")]
LcountsHet = plotData16$Positives[which(plotData16$TargetType == "Ch1Unknown")]
EcountsHet = plotData16$Positives[which(plotData16$TargetType == "Ch2Unknown")]
# Initiate table for results of Poisson test
poissonRes = as.data.frame(matrix(data=NA,ncol=6,nrow=length(LcountsHet),
dimnames=list(plotData16$Positives[which(plotData16$TargetType == "Ch1Unknown")],c("Sample","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue"))))
poissonRes$Sample = plotData16$Sample[which(plotData16$TargetType == "Ch1Unknown")]
poissonRes$Lcounts = LcountsHet
poissonRes$Ecounts = EcountsHet
poissonRes$LcountsNeg = LnegCountsHet
poissonRes$EcountsNeg = EnegCountsHet
# Test ASE using Poisson rate ratio
poissonRes$poissonPvalue = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["p.value"]])
poissonRes$poissonRateRatio = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["estimate"]])
poissonRes$rateRatioCI95low = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][1])
poissonRes$rateRatioCI95high = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][2])
plotData17 = plotData16[which(plotData16$TargetType == "Ch1Unknown" & plotData16$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F & is.na(plotData16$FractionalAbundance) == F),c("Sample","FractionalAbundance")]
plotData17$poissonPvalue = poissonRes$poissonPvalue[match(plotData17$Sample,poissonRes$Sample)]
plotData17$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData17$Sample,poissonRes$Sample)]
plotData17$rateRatioCI95low = poissonRes$rateRatioCI95low[match(plotData17$Sample,poissonRes$Sample)]
plotData17$rateRatioCI95high = poissonRes$rateRatioCI95high[match(plotData17$Sample,poissonRes$Sample)]
ggplot(plotData17) +
geom_quasirandom(aes(1, FractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
geom_segment(y=mean(plotData17$FractionalAbundance),yend=mean(plotData17$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
geom_hline(yintercept=mean(controlResMis2$FractionalAbundance,na.rm=T),color="darkgrey",linetype="dotted") +
theme_classic() +
ylab("L/(L+E) allele abundance %") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
legend.position="bottom",
legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
#guides(colour=guide_legend(title.position = "top")) +
scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)
nrow(plotData17)
## [1] 35
nrow(plotData17[which(plotData17$poissonPvalue<0.05 & plotData17$FractionalAbundance>49.4),])
## [1] 0
nrow(plotData17[which(plotData17$poissonPvalue<0.05 & plotData17$FractionalAbundance<49.4),])
## [1] 31
mean(plotData17$FractionalAbundance)
## [1] 46.27143
Attact concentrations fo mis-sense SNP 2.
# add starting total RNA concentration
ddPCRresMis2$RNAconc = RNAconcMis2$Concentration[match(ddPCRresMis2$Sample,RNAconcMis2$Sample)]
Normalizing to input RNA amount. QuantaSoft also provides calculated number of template molecules per 20 ul well. Each 20 ul well contains 2 ul of template at concentration X ng/ul_H2O, meaning 2•X ng/ul_H2O total RNA per well. Copies per ng total input RNA is then simply \(\frac{copies per well}{2*RNAconc}\)
# Split ddPCR results into two based on channels
ddPCRch1 = ddPCRresMis2[which(ddPCRresMis2$TargetType == "Ch1Unknown"),]
ddPCRch2 = ddPCRresMis2[which(ddPCRresMis2$TargetType == "Ch2Unknown"),]
# calculation
ddPCRch1$Copies = as.numeric(ddPCRch1$CopiesPer20uLWell)/(ddPCRch1$RNAconc*2)
ddPCRch2$Copies = as.numeric(ddPCRch2$CopiesPer20uLWell)/(ddPCRch2$RNAconc*2)
# attach back to main data frame
plotData17$Ch1copies = ddPCRch1$Copies[match(plotData17$Sample,ddPCRch1$Sample)]
plotData17$Ch2copies = ddPCRch2$Copies[match(plotData17$Sample,ddPCRch2$Sample)]
plotData17$TotCopies = plotData17$Ch1copies + plotData17$Ch2copies
head(plotData17)
## Sample FractionalAbundance poissonPvalue poissonRateRatio rateRatioCI95low
## 1 284-Gn 46.2 4.028261e-03 0.8540371 0.7787323
## 3 301-Gn 47.3 4.199131e-03 0.8891630 0.8334008
## 6 329-Gn 47.6 3.757661e-03 0.9008410 0.8528048
## 7 330-Gn 46.9 9.195227e-03 0.8801711 0.8133884
## 8 333-Gn 48.3 1.185848e-01 0.9288378 0.8721419
## 11 593-Gn 44.5 1.480343e-18 0.7831328 0.7452175
## rateRatioCI95high Ch1copies Ch2copies TotCopies
## 1 0.9364747 39.51220 46.09756 85.60976
## 3 0.9486070 NA NA NA
## 6 0.9515517 62.42236 68.63354 131.05590
## 7 0.9523497 60.50955 68.15287 128.66242
## 8 0.9891882 52.90520 56.57492 109.48012
## 11 0.8229271 42.11538 52.50000 94.61538
Can ASE account for EE-LL difference? Compare number of expressed molecules from each allele.
# average molecule difference between LL and EE
aveHomozDiff = mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")],na.rm=T) -
mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")],na.rm=T)
# average molecule difference between L and E
aveAlleleDiff = mean(plotData17$Ch2copies[which(plotData17$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)] ,na.rm=T) - mean(plotData17$Ch1copies[which(plotData17$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)],na.rm=T)
# how much % does 2*average L-E difference account from LL-EE difference (in molecules)?
2*(aveAlleleDiff) / (aveHomozDiff) * 100
## [1] 83.21761
colnames(ddPCRresUTR) = iconv(colnames(ddPCRresUTR), to = "ASCII", sub = "u")
colnames(ddPCRresUTR) = c(colnames(ddPCRresUTR)[-2],"Empty")
colnames(ddPCRresUTR)[1] = "Well"
# assign falsely labeled well
ddPCRresUTR$Sample[which(ddPCRresUTR$Well == "D03")] = "437-Gn"
ddPCRresUTR$Sample[which(ddPCRresUTR$Well == "D09")] = "437-Gn"
# remove failed wells
ddPCRresUTR = ddPCRresUTR[which(ddPCRresUTR$AcceptedDroplets > 10000),]
# remove samples that could not be re-analyzed from non-diluted RNA samples
ddPCRresUTR = ddPCRresUTR[which(ddPCRresUTR$Sample %in% c("289-Gn","322-Gn","326-Gn","337-Gn","338-Gn","358-Gn","370-Gn","373-Gn","385-Gn","389-Gn","390-Gn") == F),]
plotData18 = ddPCRresUTR
LnegCountsHet = plotData18$Negatives[which(plotData18$TargetType == "Ch1Unknown")]
EnegCountsHet = plotData18$Negatives[which(plotData18$TargetType == "Ch2Unknown")]
LcountsHet = plotData18$Positives[which(plotData18$TargetType == "Ch1Unknown")]
EcountsHet = plotData18$Positives[which(plotData18$TargetType == "Ch2Unknown")]
# Initiate table for results of Poisson test
poissonResUTR = as.data.frame(matrix(data=NA,ncol=6,nrow=length(LcountsHet),
dimnames=list(plotData18$Positives[which(plotData18$TargetType == "Ch1Unknown")],c("Sample","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue"))))
poissonResUTR$Sample = plotData18$Sample[which(plotData18$TargetType == "Ch1Unknown")]
poissonResUTR$Lcounts = LcountsHet
poissonResUTR$Ecounts = EcountsHet
poissonResUTR$LcountsNeg = LnegCountsHet
poissonResUTR$EcountsNeg = EnegCountsHet
# Test ASE using Poisson rate ratio
poissonResUTR$poissonPvalue = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["p.value"]])
poissonResUTR$poissonRateRatio = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
poissonResUTR$rateRatioCI95low = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["conf.int"]][1])
poissonResUTR$rateRatioCI95high = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["conf.int"]][2])
plotData19 = plotData18[which(plotData18$TargetType == "Ch1Unknown",plotData18$Sample %in% c("EL-gBlock","NTC") == F & is.na(plotData18$InverseFractionalAbundance) == F),c("Sample","InverseFractionalAbundance")]
plotData19$poissonPvalue = poissonResUTR$poissonPvalue[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$poissonRateRatio = poissonResUTR$poissonRateRatio[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$rateRatioCI95low = poissonResUTR$rateRatioCI95low[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$rateRatioCI95high = poissonResUTR$rateRatioCI95high[match(plotData19$Sample,poissonResUTR$Sample)]
ggplot(plotData19) +
geom_quasirandom(aes(1, InverseFractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
geom_segment(y=mean(plotData19$InverseFractionalAbundance),yend=mean(plotData19$InverseFractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
geom_hline(yintercept=50,color="darkgrey",linetype="dotted") +
theme_classic() +
ylab("L/(L+E) allele abundance %") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
legend.position="bottom",
legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
#guides(colour=guide_legend(title.position = "top")) +
scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)
nrow(plotData19)
## [1] 35
nrow(plotData19[which(plotData19$poissonPvalue<0.05 & plotData19$InverseFractionalAbundance>50),])
## [1] 21
nrow(plotData19[which(plotData19$poissonPvalue<0.05 & plotData19$InverseFractionalAbundance<50),])
## [1] 5
mean(plotData19$InverseFractionalAbundance)
## [1] 54.00571
Combine ddPCR ASE results from URT and mis-sense SNPs 1 and 2 to the same figure.
plotData19$Assay = "UTR"
plotData19$FractionalAbundance = plotData19$InverseFractionalAbundance # dummy fractional abundance column
plotData17$Assay = "Mis2"
plotData9$Assay = "Mis1"
plotData20 = rbind(plotData19[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")], plotData17[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")], plotData9[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")])
plotDataControl = data.frame(Assay = c("UTR","Mis1", "Mis2"), mean = c(50,mean(controlResMis1$FractionalAbundance,na.rm=T),mean(controlResMis2$FractionalAbundance,na.rm=T)))
plotDataControl$Assay = factor(plotDataControl$Assay,levels=c("UTR","Mis1","Mis2"))
assayLabels = c("UTR"="5' UTR", "Mis1"="Exon 2","Mis2"="Exon 3")
plotData20$Assay = factor(plotData20$Assay,levels=c("UTR","Mis1","Mis2"))
plotData20$ColorPvalue=1-plotData20$poissonPvalue
plotData20$ColorPvalue[which(plotData20$Assay == "UTR" & plotData20$FractionalAbundance<50)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "UTR" & plotData20$FractionalAbundance<50)])
plotData20$ColorPvalue[which(plotData20$Assay == "Mis1" & plotData20$FractionalAbundance<50.44)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "Mis1" & plotData20$FractionalAbundance<50.44)])
plotData20$ColorPvalue[which(plotData20$Assay == "Mis2" & plotData20$FractionalAbundance<49.4)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "Mis2" & plotData20$FractionalAbundance<49.4)])
ggplot(plotData20) +
geom_quasirandom(aes(1, FractionalAbundance,color=ColorPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
#geom_segment(y=mean(plotData4$FractionalAbundance),yend=mean(plotData4$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
geom_hline(data=plotDataControl,aes(yintercept=mean),color="darkgrey",linetype="dotted") +
facet_wrap(~Assay,labeller=labeller(Assay=assayLabels)) +
theme_classic() +
ylab("L/(L+E) allele abundance %") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size=14,color="black"),
axis.title.y=element_text(size=14,color="black"),
axis.line.x = element_blank(),
strip.background=element_blank(),
strip.text=element_text(size=14,color="black"),
legend.position="bottom",
legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
#guides(colour=guide_legend(title.position = "top")) +
scale_color_gradient2(name = "ASE P-value", high="red", low="blue", mid="grey", midpoint=0,labels=c(0,0.5,1,0.5,0))