Analysis of vgll3 expression (PLOS Genetics)

Comparison of vgll3 expression acros tissues

Load locally saved data files.

load("/Users/verta/Documents/ddPCR/ddPCR_results/ddPCRdataFiles.RData")

##

Assign shorted tissue labels.

design$TissueOrRep[which(design$TissueOrRep == "heart")] = "Hr"
design$TissueOrRep[which(design$TissueOrRep == "liver")] = "Liv"
design$TissueOrRep[which(design$TissueOrRep == "muscle")] = "Mc"
design$TissueOrRep[which(design$TissueOrRep == "adipose")] = "Ad"
design$TissueOrRep[which(design$TissueOrRep == "gonad")] = "Gn"
design$TissueOrRep[which(design$TissueOrRep == "brain")] = "Br"
design$TissueOrRep[which(design$TissueOrRep == "sperm")] = "Sp"
design$TissueOrRep[which(design$TissueOrRep == "stripped_gonad")] = "SG"
head(design)

##    VGLL3 FamID TankOrPosition Individual TissueOrRep Temp CollectionDate PIT
## 13    LL    13        W1-3-3C         11           1    W       19022018  NA
## 14    LL    13        W1-3-3C         43           2    W       19022018  NA
## 15    LL    13        W1-3-3C         75           3    W       19022018  NA
## 19    LE    15        W1-4-2A         17           1    W       19022018  NA
## 20    LE    15        W1-4-2A         49           2    W       19022018  NA
## 21    LE    15        W1-4-2A         81           3    W       19022018  NA
##    SexGeno SexPheno Gonads
## 13      NA       NA     NA
## 14      NA       NA     NA
## 15      NA       NA     NA
## 19      NA       NA     NA
## 20      NA       NA     NA
## 21      NA       NA     NA

Compile the results tables from QuantStudio. Join results table from different runs (reverse the order to get the first timepoint first). The “micro” in the column names is non-ASCII character that we ignore using check.names=F and then convert to “u” using iconv. Also the column names are shifted one towards the right, so this is corrected for.

ddPCRres = rbind(ddPCRres1, ddPCRres2, ddPCRres3, ddPCRres4, ddPCRres5, ddPCRres6, ddPCRres7, ddPCRres8, ddPCRres9, ddPCRres10, ddPCRres12, ddPCRres13, ddPCRres14)
colnames(ddPCRres) = iconv(colnames(ddPCRres), to = "ASCII", sub = "u")
colnames(ddPCRres) = c(colnames(ddPCRres)[-2],"Empty")

Compile table containing the input concentrations of RNA for these samples. Paste vgll3 genotype, position/tank, tissue/rep and collection date to make up sample ID’s as above.

conc$Sample = 'Na'

# match by sample names given in ddPCR plate (this may vary from run to run) 

# whole fry samples from 19022018
conc$Sample[which(conc$CollectionDate == '19022018')] = paste(conc$TankOrPosition[which(conc$CollectionDate == '19022018')], conc$TissueOrRep[which(conc$CollectionDate == '19022018')], sep='-')

# samples with vgll3-tank-tissue-date IDs
conc$Sample[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))] = 
  paste(conc$VGLL3[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))],
        conc$TankOrPosition[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))], 
        conc$TissueOrRep[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))], 
        conc$CollectionDate[which(conc$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018"))], sep='-')

# heterozygote samples from 11072018 with individual # IDs
conc$Sample[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))] = 
  paste(conc$Individual[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))], 
        conc$TissueOrRep[which(conc$CollectionDate == '11072018' & conc$VGLL3 %in% c("EL","LE"))], sep='-')

# homozygote/heterozygote samples with individual # IDs
conc$Sample[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
                                             "20092018","9102018","10102018","11102018","12102018",
                                                       "5112018","6112018","7112018","8112018","9112018","12112018",
                                                       "23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))] =
  paste(conc$Individual[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
                                                          "20092018","9102018","10102018","11102018","12102018",
                                                       "5112018","6112018","7112018","8112018","9112018","12112018",
                                                       "23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))],
        conc$TissueOrRep[which(conc$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
                                                           "20092018","9102018","10102018","11102018","12102018",
                                                       "5112018","6112018","7112018","8112018","9112018","12112018",
                                                       "23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019"))],sep='-')

head(conc)

##    .id VGLL3 TankOrPosition Individual TissueOrRep CollectionDate RNAconc
## 1:   1    EL             10        268          Ad       11072018    10.8
## 2:   1    LE             10        269          Ad       11072018     9.6
## 3:   1    LE             10        269          Gn       11072018    11.0
## 4:   1    EL             12        272          Ad       11072018    10.0
## 5:   1    EL             12        272          Gn       11072018     7.6
## 6:   1    LE             12        273          Ad       11072018    10.3
##    Sample
## 1: 268-Ad
## 2: 269-Ad
## 3: 269-Gn
## 4: 272-Ad
## 5: 272-Gn
## 6: 273-Ad

tail(conc)

##    .id VGLL3 TankOrPosition Individual TissueOrRep CollectionDate RNAconc
## 1:  13    EE             15        278          Ad       11072018     9.8
## 2:  13    LL             15        279          Hr       11072018     9.1
## 3:  13    LL             15        279         Liv       11072018    10.7
## 4:  13    LL             15        279          Mc       11072018    10.8
## 5:  13    LL             15        279          Ad       11072018    10.1
## 6:  13    LL             15        279          Gn       11072018     9.8
##                Sample
## 1:  EE-15-Ad-11072018
## 2:  LL-15-Hr-11072018
## 3: LL-15-Liv-11072018
## 4:  LL-15-Mc-11072018
## 5:  LL-15-Ad-11072018
## 6:  LL-15-Gn-11072018

Duplicate concentration measurements

Second concentration reading for gonad samples that have been analyzed.

conc2 = na.omit(conc2)

Merge the two concentration measurements and compare by plotting 1st concentration readings versus 2nd.

aveConc = merge(conc,conc2,by.x="Sample",by.y="Sample")
summary(lm(aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x))

## 
## Call:
## lm(formula = aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -82.748   0.683   4.405  13.368  44.676 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## aveConc$RNAconc.x  0.92554    0.03045   30.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.16 on 107 degrees of freedom
## Multiple R-squared:  0.8962, Adjusted R-squared:  0.8952 
## F-statistic: 923.8 on 1 and 107 DF,  p-value: < 2.2e-16

plot(aveConc$RNAconc.x,aveConc$RNAconc.y, ylab="RNA concentration measurement 1", xlab="RNA concentration measurement 2")
abline(lm(aveConc$RNAconc.y ~ 0 + aveConc$RNAconc.x))

Below we identify problematic samples where the 2nd concentration is much lower than the 1st (degradation of RNA more likely than false concentration on the 1st reading).

aveConc[which(aveConc$RNAconc.x/aveConc$RNAconc.y > 2)]

##    Sample .id VGLL3 TankOrPosition Individual.x TissueOrRep CollectionDate
## 1: 326-Gn   9    LE           2(2)          326          Gn        9082018
## 2: 338-Gn   9    LE             15          338          Gn       13082018
## 3: 341-Gn   9    EL          15(2)          341          Gn       13082018
## 4: 358-Gn   9    LE          12(2)          358          Gn       14082018
## 5: 370-Gn   9    LE             10          370          Gn       15082018
## 6: 373-Gn   9    EL          10(2)          373          Gn       15082018
## 7: 389-Gn   9    EL          13(2)          389          Gn       16082018
## 8: 390-Gn   9    LE          13(2)          390          Gn       16082018
## 9: 621-Gn   8    LE              2          621          Gn        9102018
##    RNAconc.x Individual.y RNAconc.y
## 1:      97.4          326       7.4
## 2:      60.9          338      22.3
## 3:      89.4          341      12.9
## 4:      53.7          358      13.7
## 5:      69.4          370      10.2
## 6:      72.1          373      14.6
## 7:      90.6          389      16.8
## 8:      61.0          390      16.9
## 9:     117.0          621      47.0

# turn the second concentration reading for these samples to NA's
omitConcSamples = aveConc[which(aveConc$RNAconc.x/aveConc$RNAconc.y > 2),"Sample"]
conc2$RNAconc[which(conc2$Sample %in% omitConcSamples$Sample)] = NA
# verify
plot(conc$RNAconc[match(conc2$Sample,conc$Sample)],conc2$RNAconc, ylab="RNA concentration measurement 1", xlab="RNA concentration measurement 2")

Merge the two measurements again and take the average of the two measurements and use this as the value for all analyses.

aveConc = merge(conc,conc2,by.x="Sample",by.y="Sample")
aveConc$aveRNAconc = NA 
aveConc$aveRNAconc = apply(aveConc[,c("RNAconc.x","RNAconc.y")],1,mean,na.rm=T)
aveConc

##                 Sample .id VGLL3 TankOrPosition Individual.x TissueOrRep
##   1:           1112-Gn  10    EL             12         1112          Gn
##   2:           1113-Gn  10    EL          12(2)         1113          Gn
##   3:            272-Gn   1    EL             12          272          Gn
##   4:            283-Gn   1    LL              5          283          Gn
##   5:            284-Gn   9    EL              5          284          Gn
##  ---                                                                    
## 104: EE-10-Gn-11072018  13    EE             10          266          Gn
## 105: EE-12-Gn-11072018  13    EE             12          270          Gn
## 106: LL-10-Gn-11072018  13    LL             10          267          Gn
## 107: LL-12-Gn-11072018  13    LL             12          271          Gn
## 108: LL-15-Gn-11072018  13    LL             15          279          Gn
##      CollectionDate RNAconc.x Individual.y RNAconc.y aveRNAconc
##   1:       23112018      13.1         1112      25.3      19.20
##   2:       23112018      14.5         1113      18.0      16.25
##   3:       11072018       7.6          272      12.3       9.95
##   4:        6082018      11.0          283      14.4      12.70
##   5:        6082018      17.9          284      17.1      17.50
##  ---                                                           
## 104:       11072018      10.2          266      13.6      11.90
## 105:       11072018      10.3          270      12.9      11.60
## 106:       11072018      11.0          267      15.5      13.25
## 107:       11072018       9.1          271       9.5       9.30
## 108:       11072018       9.8          279      13.9      11.85

Calculation of total concentrations

Intersect design table, results table and concentrations to make a master table to be used in downstream analyses.

# Initiate empty table
countTable = setNames(data.frame(matrix(ncol = 18, nrow = 0)), c("VGLL3","FamID","TankOrPosition","Individual","TissueOrRep","Temp","CollectionDate","Sex","Sample","Ch1Conc","Ch2Conc","Ch1Copiesper20ulWell","Ch2Copiesper20ulWell","FractionalAbundance","Ch1Pos","Ch2Pos","Ch1Neg","Ch2Neg"))

# Split ddPCR results into two based on channels
ddPCRch1 = ddPCRres[which(ddPCRres$TargetType == "Ch1Unknown"),]
ddPCRch2 = ddPCRres[which(ddPCRres$TargetType == "Ch2Unknown"),]

#################################################################################
# Add ddPCR channel concentrations by mathing sample name to information on design table 

# whole fry samples from 19022018
sel = function(x) unlist(strsplit(x,"-"))[1] == "W1" 
tempConc1 = ddPCRch1[unlist(lapply(ddPCRch1$Sample,sel)),]
tempConc2 = ddPCRch2[unlist(lapply(ddPCRch2$Sample,sel)),]
tempDesign = design[unlist(lapply(design$TankOrPosition,sel)),]
tempDesign$Sample = paste(tempDesign$TankOrPosition,tempDesign$TissueOrRep,sep="-")
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(sel,tempConc1,tempConc2,tempDesign)

# samples with vgll3-tank-tissue-date IDs
sel = function(x) unlist(strsplit(x,"-"))[4] %in% c("20032018","20042018","17052018","11062018","11072018") & unlist(strsplit(x,"-"))[1] %in% c("EL","LE") == F
tempConc1 = ddPCRch1[unlist(lapply(ddPCRch1$Sample,sel)),]
tempConc2 = ddPCRch2[unlist(lapply(ddPCRch2$Sample,sel)),]
tempDesign = design[which(design$CollectionDate %in% c("20032018","20042018","17052018","11062018","11072018") & design$VGLL3 %in% c("EL","LE") == F),]
tempDesign$Sample = paste(tempDesign$VGLL3,tempDesign$TankOrPosition,tempDesign$TissueOrRep,tempDesign$CollectionDate, sep="-")
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(sel,tempConc1,tempConc2,tempDesign)

# heterozygote samples from 11072018 with individual # IDs
tempDesign = design[which(design$CollectionDate == '11072018' & design$VGLL3 %in% c("EL","LE")),]
tempDesign$Sample = paste(tempDesign$Individual,tempDesign$TissueOrRep, sep="-")
tempConc1 = ddPCRch1[which(ddPCRch1$Sample %in% tempDesign$Sample),]
tempConc2 = ddPCRch2[which(ddPCRch2$Sample %in% tempDesign$Sample),]
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(tempConc1,tempConc2,tempDesign)

# samples with individual # IDs
tempDesign = design[which(design$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018","27082018","28082018","29082018",
                                                       "20092018","9102018","10102018","11102018","12102018",
                                                       "5112018","6112018","7112018","8112018","9112018","12112018",
                                                       "23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018","10012019")),]
tempDesign$Sample = paste(tempDesign$Individual,tempDesign$TissueOrRep, sep="-")
tempConc1 = ddPCRch1[which(ddPCRch1$Sample %in% tempDesign$Sample),]
tempConc2 = ddPCRch2[which(ddPCRch2$Sample %in% tempDesign$Sample),]
tempDesign$Ch1Copiesper20ulWell = tempConc1$CopiesPer20uLWell[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Copiesper20ulWell = tempConc2$CopiesPer20uLWell[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Conc = tempConc1$Concentration[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Conc = tempConc2$Concentration[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$FractionalAbundance = tempConc1$FractionalAbundance[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ratio = tempConc1$Ratio[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMax = tempConc1$PoissonRatioMax[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$PoissonRatioMin = tempConc1$PoissonRatioMin[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch1Pos = tempConc1$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Pos = tempConc2$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1+Ch2+"[match(tempDesign$Sample,tempConc2$Sample)]
tempDesign$Ch1Neg = tempConc1$"Ch1-Ch2+"[match(tempDesign$Sample,tempConc1$Sample)] + tempConc1$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc1$Sample)]
tempDesign$Ch2Neg = tempConc2$"Ch1+Ch2-"[match(tempDesign$Sample,tempConc2$Sample)] + tempConc2$"Ch1-Ch2-"[match(tempDesign$Sample,tempConc2$Sample)]
countTable = rbind(countTable,tempDesign)
rm(tempConc1,tempConc2,tempDesign)

######################################
# add starting total RNA concentration
countTable$RNAconc = conc$RNAconc[match(countTable$Sample,conc$Sample)]
#head(countTable)
#tail(countTable)

# replace with average concentrations for samples analyzed in duplicates
countTable$RNAconc[match(aveConc$Sample,countTable$Sample)] = aveConc$aveRNAconc

######################################
# remove samples that failed the ddPCR
failedSamples = c()
failed = c(which(apply(countTable[,c("Ch1Pos","Ch2Pos","Ch1Neg","Ch2Neg")],1,sum) < 10000),
                 which(countTable$Sample %in% failedSamples))
countTable = countTable[-failed,]

We wish to normalize the number of vgll3 copies to the amount of input RNA in each sample. Input RNA concentration is ng/ul of template (ng/ul_H2O) and ddPCR concentration is copies/ul of PCR reaction (cp/ul_PCR).

We first calculate the concentration of template RNA in the PCR reaction: we know the original template concentration, X ng/ul_H2O. We mixed 2 ul of template with 20 ul of PCR reaction, meaning we put 2•X ng into 22 ul of PCR reaction and our concentration is 2•X/22 = N ng/ul_PCR.

Quantasoft reports target concentration as Y copies/ul_PCR. So now we have Y copies/ul_PCR and we know there was N ng/ul_PCR of template, meaning we can calculate our normalized value of expression as Y copies/N ng of template by dividing Y with N:

\(\frac{Y copies}{1 ul}\) : \(\frac{N ng}{1 ul}\) = \(\frac{Y copies}{1 ul}\) • \(\frac{1 ul}{N ng}\) = \(\frac{Y copies}{N ng}\)

Therefore, we need to calculate the concentration of the total RNA per PCR reaction (N = [RNAconc*2]/22) and divide ddPCR concentration with this value to get copies/ng of input RNA.

# correct RNA concentration for one sample with pipetting error (40ul of RT-ddPCR MM instead of 20ul)
#countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"] = countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"]/2

# concentration measurement for individual 918 gonad is false
#countTable[which(grepl("918-Gn",countTable$Sample)),"RNAconc"] = NA

# calculation
# countTable$Ch1copies = as.numeric(countTable$Ch1Conc)/((countTable$RNAconc*2)/22)
# countTable$Ch2copies = as.numeric(countTable$Ch2Conc)/((countTable$RNAconc*2)/22)
# countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies 
# head(countTable)

# alternative way of calculating
# # calculation of target concentration in template (2 ul)
# countTable$Ch1TemplateConc = as.numeric(countTable$Ch1Conc)*20/2
# countTable$Ch2TemplateConc = as.numeric(countTable$Ch2Conc)*20/2
# countTable$TotTemplateConc = countTable$Ch1TemplateConc+countTable$Ch2TemplateConc
# 
# # calculation of template copies per ng of input RNA
# countTable$Ch1copies = countTable$Ch1TemplateConc / countTable$RNAconc
# countTable$Ch2copies = countTable$Ch2TemplateConc / countTable$RNAconc
# countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies 
# head(countTable)

Alternative way of normalizing to input RNA amount. QuantaSoft also provides calculated number of template molecules per 20 ul well. Each 20 ul well contains 2 ul of template at concentration X ng/ul_H2O, meaning 2•X ng/ul_H2O total RNA per well. Copies per ng total input RNA is then simply \(\frac{copies per well}{2*RNAconc}\)

# correct RNA concentration for one sample with pipetting error (40ul of RT-ddPCR MM instead of 20ul)
countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"] = countTable[which(grepl("582-Ad",countTable$Sample)),"RNAconc"]/2

# calculation
countTable$Ch1copies = as.numeric(countTable$Ch1Copiesper20ulWell)/(countTable$RNAconc*2)
countTable$Ch2copies = as.numeric(countTable$Ch2Copiesper20ulWell)/(countTable$RNAconc*2)
countTable$TotCopies = countTable$Ch1copies + countTable$Ch2copies 
head(countTable)

##    VGLL3 FamID TankOrPosition Individual TissueOrRep Temp CollectionDate PIT
## 13    LL    13        W1-3-3C         11           1    W       19022018  NA
## 14    LL    13        W1-3-3C         43           2    W       19022018  NA
## 15    LL    13        W1-3-3C         75           3    W       19022018  NA
## 19    LE    15        W1-4-2A         17           1    W       19022018  NA
## 20    LE    15        W1-4-2A         49           2    W       19022018  NA
## 21    LE    15        W1-4-2A         81           3    W       19022018  NA
##    SexGeno SexPheno Gonads    Sample Ch1Copiesper20ulWell Ch2Copiesper20ulWell
## 13      NA       NA     NA W1-3-3C-1                  224                  0.0
## 14      NA       NA     NA W1-3-3C-2                  198                  0.0
## 15      NA       NA     NA W1-3-3C-3                  136                  1.6
## 19      NA       NA     NA W1-4-2A-1                   62                 76.0
## 20      NA       NA     NA W1-4-2A-2                  164                 74.0
## 21      NA       NA     NA W1-4-2A-3                   82                 60.0
##    Ch1Conc Ch2Conc FractionalAbundance Ratio PoissonRatioMax PoissonRatioMin
## 13    11.2       0                  NA    NA              NA              NA
## 14     9.9       0                  NA    NA              NA              NA
## 15     6.8    0.08                98.8 80.00          270.00            0.00
## 19     3.1     3.8                45.0  0.83            1.28            0.38
## 20     8.2     3.7                69.0  2.20            3.10            1.30
## 21     4.1       3                58.0  1.40            2.00            0.70
##    Ch1Pos Ch2Pos Ch1Neg Ch2Neg RNAconc Ch1copies  Ch2copies TotCopies
## 13    110      0  11498  11608    10.1 11.089109 0.00000000 11.089109
## 14    113      0  13357  13470    11.0  9.000000 0.00000000  9.000000
## 15     80      1  13899  13978    11.0  6.181818 0.07272727  6.254545
## 19     24     29   8999   8994    12.4  2.500000 3.06451613  5.564516
## 20     68     31   9734   9771    11.0  7.454545 3.36363636 10.818182
## 21     42     31  12141  12152    10.7  3.831776 2.80373832  6.635514

Analysis of total expression in tissues

Group data by sampling time-point (months post-hatching).

#+++++++++++++++++++++++++
# Function to calculate the mean, the standard deviation and the standard error of the mean for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
  #to be summariezed
# groupnames : vector of column names to be used as
  # grouping variables
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE),
      sem = sd(x[[col]], na.rm=TRUE)/sqrt(length(x[[col]])),
      n = length(x[[col]]))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,varname)
  data_sum <- plyr::rename(data_sum, c("mean" = varname))
 return(data_sum)
}

# duplicate main results table
plotData = countTable

# group all whole fry samples together (timepoint 2)
plotData$TissueOrRep = revalue(plotData$TissueOrRep, c("1"="Fry","2"="Fry","3"="Fry"))

# re-order tissues for plotting
plotData$TissueOrRep = factor(plotData$TissueOrRep,levels=c("Fry","Hr","Liv","Mc","Ad","Gn","Br","SG","Sp"))

# re-order vgll3 genotypes so that EE is plotted first
#plotData$VGLL3 = factor(plotData$VGLL3,levels=c("EE","EL","LE","LL"))

# assign gonads as factors
plotData$Gonads = factor(plotData$Gonads,levels=c("m","s","l","xl"))

# remove infinite and missing values
plotData = plotData[which(is.infinite(plotData$TotCopies) == F),]
plotData = plotData[which(is.na(plotData$TotCopies) == F),]

# add information of timepoints
plotData$Timepoint = "Na"
plotData$Timepoint[which(plotData$CollectionDate == "19022018")] = "2"
plotData$Timepoint[which(plotData$CollectionDate == "20032018")] = "3"
plotData$Timepoint[which(plotData$CollectionDate == "20042018")] = "4"
plotData$Timepoint[which(plotData$CollectionDate == "17052018")] = "5"
plotData$Timepoint[which(plotData$CollectionDate == "11062018")] = "6"
plotData$Timepoint[which(plotData$CollectionDate == "11072018")] = "7"
plotData$Timepoint[which(plotData$CollectionDate %in% c("6082018","8082018","9082018","13082018","14082018", "15082018","16082018","17082018"))] = "8"
plotData$Timepoint[which(plotData$CollectionDate %in% c("27082018","28082018","29082018","20092018"))] = "9"
plotData$Timepoint[which(plotData$CollectionDate %in% c("8102018","9102018","10102018","11102018","12102018"))] = "10"
plotData$Timepoint[which(plotData$CollectionDate %in% c("5112018","6112018","7112018","8112018","9112018","12112018"))] = "11"
plotData$Timepoint[which(plotData$CollectionDate %in% c("23112018","26112018","27112018","28112018","29112018","30112018","3122018","4122018","5122018"))] = "12"
plotData$Timepoint[which(plotData$CollectionDate %in% c("10012019"))] = "13"

# copy data to a new data frame to be modified
plotData2 = plotData

Sample exclusions Below we verify that genotypes and RT-ddPCR signal channel match (by indentifying individuals that e.g. are recorded as LL but only contain signal from the EE probes), and that recorder morphological sex matches genotypic sex. We also exclude data from heterozygotes showing monoallelic vgll3 expression, likely representing sampling or labeling error.

# LL genotypes should express LL probe higher compared to EE probe
LLsamples = plotData2$Sample[which(plotData2$VGLL3 == "LL")]
LLinconsistent = LLsamples[which(plotData2$Ch1copies[which(plotData2$VGLL3 == "LL")] - plotData2$Ch2copies[which(plotData2$VGLL3 == "LL")] < 0)]

# EE genotypes should express EE probe higher compared to LL probe
EEsamples = plotData2$Sample[which(plotData2$VGLL3 == "EE")]
EEinconsistent = EEsamples[which(plotData2$Ch1copies[which(plotData2$VGLL3 == "EE")] - plotData2$Ch2copies[which(plotData2$VGLL3 == "EE")] > 0)]

# remove these individuals from the data
plotData2 = plotData2[which(plotData2$Sample %in% c(EEinconsistent,LLinconsistent) == F),]

# remove individuals where genotypic sex and morphology do not match
inconsistentSex = plotData2[which(plotData2$SexGeno == "male" & plotData2$Gonads == "m" | (plotData2$SexGeno == "female" & plotData2$Gonads %in% c("s","l","xl"))),]
plotData2 = plotData2[which(plotData2$Individual %in% inconsistentSex$Individual == F),]

# remove heterozygotes with monoallelic vgll3 expression
monoallelic = plotData2[which(plotData2$VGLL3 %in% c("EL","LE") & plotData2$FractionalAbundance > 90),]
plotData2 = plotData2[which(plotData2$Individual %in% monoallelic$Individual == F),]

Impute missing annotations on early samples (morphological differences between gonads were unrecorded for first dissectable gonads because of their very small size) and assign gonad types based on gonad morphology. Create a new column “TissueOrGonadType” where gonads are assigned to ovaries, immature testes, pubertal testes and mature testes based on their visual morphology (recorded in “Gonads” column).

# assign missing gonad types by genotypic sex in early samples (all males before timepoint 8 are type "s" and females type "m")
missingGonadInfo = plotData2[which(is.na(plotData2$Gonads) & is.na(plotData2$SexGeno)==F), "Individual"]

for (i in missingGonadInfo){
  if (unique(plotData2$SexGeno[which(plotData2$Individual == i)]) == "female"){
    plotData2[which(plotData2$Individual == i),"Gonads"] = "m"
  }
  if (unique(plotData2$SexGeno[which(plotData2$Individual == i)]) == "male"){
    plotData2[which(plotData2$Individual == i),"Gonads"] = "s"
  }
}

# verify that assignment is correct
table(plotData2[which(plotData2$Individual%in% missingGonadInfo),c("SexGeno","Gonads")])

##         Gonads
## SexGeno   m  s  l xl
##   female 76  0  0  0
##   male    0 59  0  0
##   NA      0  0  0  0

# assign ovaries, immature testes and mature testes by morphology 
# here l gonads considered "pubertal" because they have clearly initiated pubertal cell type changes
plotData2$TissueOrGonadType = NA
plotData2$TissueOrGonadType = as.character(plotData2$TissueOrRep)
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads == "m"),"TissueOrGonadType"] = "Ov" 
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("s")),"TissueOrGonadType"] = "IT"
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("l")),"TissueOrGonadType"] = "PT"
plotData2[which(plotData2$TissueOrRep == "Gn" & plotData2$Gonads %in% c("xl")),"TissueOrGonadType"] = "MT"

Average expression across tissues and time points (with number of samples in each time-point indicated by number in grey).

# calculate sd and sem for plotting
plotStats = data_summary(plotData2, varname="TotCopies", groupnames=c("Timepoint","TissueOrGonadType"))

# sort by timepoint
plotStats$Timepoint = factor(plotStats$Timepoint,levels=c("2","3","4","5","6","7","8","9","10","11","12","13","14"))

# order tissues
plotStats$TissueOrGonadType = factor(plotStats$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))

# remove rows where we don't know the gonad morphology
plotStats = plotStats[is.na(plotStats$TissueOrGonadType)==F,]

# labels
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Ovary","IT"="Immature testis","PT"="Pubertal testes","MT"="Mature testis","SG"="Stripped testis","Sp"="Ejaculate")

ggplot(plotStats,aes(Timepoint, TotCopies)) +
 geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
 geom_line() +
 geom_point() +
 theme_classic() +
 theme(axis.title.y=element_text(size=16),
       axis.text.y=element_text(size=16),
       axis.title.x=element_text(size=16),
       axis.text.x=element_text(size=16),
       strip.text.x=element_text(size=16),
       strip.background = element_blank()) +
 geom_text(aes(y=TotCopies-15,x=as.numeric(Timepoint)+0.45,label=n),vjust=0.5,hjust=0.4,color="darkgrey", size=3) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Months post hatch") +
 #ylim(-50,250) +
 scale_x_discrete(breaks=seq(2,14,2))

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Contrast immature testes expression with expression in mature testes, stripped testes and ejaculate.

# summary stats for timepoints where we have mature gonads (for panel b)
plotStats2 = data_summary(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")]),], varname="TotCopies", groupnames=c("TissueOrGonadType"))

# order tissues
plotStats2$TissueOrGonadType = factor(plotStats2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))

ggplot(plotStats2[which(plotStats2$TissueOrGonadType %in% c("IT","MT", "Sp","SG")),],aes(TissueOrGonadType, TotCopies, color=TissueOrGonadType)) +
         geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
         geom_point(size=4) +
         theme_classic() +
         scale_color_manual(values=c("black","black", "black","black")) + 
         ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_text(hjust=1, vjust=1, size=14,color="black",angle=65),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               legend.position="none")+
        geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
         scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))

# t-tests
# mature testes versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="MT"),"TotCopies"],
       plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])

## 
##  Welch Two Sample t-test
## 
## data:  plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "MT")] & plotData2$TissueOrGonadType == "MT"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "MT")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -6.3351, df = 25.904, p-value = 1.061e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -92.63591 -47.24216
## sample estimates:
## mean of x mean of y 
##  36.70081 106.63984

# stripped testes versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="SG")] & plotData2$TissueOrGonadType=="SG"),"TotCopies"],
       plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="SG")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])

## 
##  Welch Two Sample t-test
## 
## data:  plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "SG")] & plotData2$TissueOrGonadType == "SG"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "SG")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -3.5935, df = 21.795, p-value = 0.001635
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -110.89187  -29.70551
## sample estimates:
## mean of x mean of y 
##  57.99373 128.29242

# ejaculate versus immature testes
t.test(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="Sp")] & plotData2$TissueOrGonadType=="Sp"),"TotCopies"],
       plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="Sp")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"])

## 
##  Welch Two Sample t-test
## 
## data:  plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "Sp")] & plotData2$TissueOrGonadType == "Sp"), "TotCopies"] and plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType == "Sp")] & plotData2$TissueOrGonadType == "IT"), "TotCopies"]
## t = -4.2303, df = 6.5563, p-value = 0.004505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -180.68655  -49.96748
## sample estimates:
## mean of x mean of y 
##  18.54077 133.86778

# percent reduction in expression:
ITmean = mean(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="IT"),"TotCopies"],na.rm=T)
MTmean = mean(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")] & plotData2$TissueOrGonadType=="MT"),"TotCopies"],na.rm=T)
(ITmean-MTmean)/ITmean*100

## [1] 65.58434

# contrast p-value
my_comparisons5  = data.frame(start=c("IT"), 
                            end=c("MT"), 
                            y=c(120,120),
                            label=c("P=1e-06"))
my_comparisons6  = data.frame(start=c("IT"), 
                            end=c("SG"), 
                            y=c(130,130),
                            label=c("P=0.002"))
my_comparisons7  = data.frame(start=c("IT"), 
                            end=c("Sp"), 
                            y=c(140,140),
                            label=c("P=0.005"))

testesFig2 = ggplot(plotStats2[which(plotStats2$TissueOrGonadType %in% c("IT","MT", "Sp","SG")),]) +
         geom_errorbar(aes(x=TissueOrGonadType, ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1,position=position_dodge(0.05)) +
         geom_point(aes(TissueOrGonadType, TotCopies, color=TissueOrGonadType),size=2) +
         theme_classic() +
         geom_signif(data=my_comparisons5,
                    aes(xmin=start, xmax=end, annotations=label, y_position=y),
                    textsize = 3, size=0.5, tip_length = 0.01,
                    manual=TRUE) +
         geom_signif(data=my_comparisons6,
                    aes(xmin=start, xmax=end, annotations=label, y_position=y),
                    textsize = 3, size=0.5, tip_length = 0.01,
                    manual=TRUE) +
         geom_signif(data=my_comparisons7,
                    aes(xmin=start, xmax=end, annotations=label, y_position=y),
                    textsize = 3, size=0.5, tip_length = 0.01,
                    manual=TRUE) +
         scale_color_manual(values=c("black","black", "black","black")) + 
         ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_text(hjust=1, vjust=1, size=10,color="black",angle=65),
               axis.text.y=element_text(size=10,color="black"),
               axis.title.y=element_text(size=10,color="black"),
               legend.position="none")+
        #geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
         scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))

## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

testesFig2

Testes expression colored by genotype.

# collapse heterozygotes
plotData2$VGLL3RECPRO = revalue(plotData2$VGLL3,c("EL"="EL","LE"="EL"))

# summary stats for timepoints where we have mature gonads (for panel b)
plotStats2_2 = data_summary(plotData2[which(plotData2$Timepoint %in% plotData2$Timepoint[which(plotData2$TissueOrGonadType=="MT")]),], varname="TotCopies", groupnames=c("TissueOrGonadType","VGLL3RECPRO"))

# order tissues
plotStats2_2$TissueOrGonadType = factor(plotStats2_2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT","MT","SG","Sp"))

ggplot(plotStats2_2[which(plotStats2_2$TissueOrGonadType %in% c("IT","MT")),], aes(TissueOrGonadType, TotCopies, color=VGLL3RECPRO)) +
         geom_point(size=3, position=position_dodge(width=0.05)) +
         geom_errorbar(aes(ymin=TotCopies-sem, ymax=TotCopies+sem), width=.1, position=position_dodge(width=0.05)) +
         theme_classic() +
         scale_color_manual(name=expression(~italic("vgll3")),values=c("blue","darkgrey","red")) +
         ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_text(hjust=1, vjust=1, size=14,color="black",angle=65),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               legend.position="right",
               legend.title = element_text(size=14))+
        #geom_text(aes(y=TotCopies-4,x=TissueOrGonadType,label=n),vjust=1,hjust=-0.25,color="darkgrey", size=3) +
         scale_x_discrete(labels=c("Immature testes","Mature testes","Stripped testes","Ejaculate"))

Analysis in context of Amh/Igf3 expression ratio and clustering

Combine result tables from QuantStudio output. Channel 1 (FAM) Amh channel 2 (VIC) Igf3

# load results
ddPCRresAmhIgf1 = rbind(ddPCRresAmhIgf1_1,ddPCRresAmhIgf1_2)

colnames(ddPCRresAmhIgf1) = iconv(colnames(ddPCRresAmhIgf1), to = "ASCII", sub = "u")
colnames(ddPCRresAmhIgf1) = c(colnames(ddPCRresAmhIgf1)[-2],"Empty")

# calculate efficiency factor for amh (FAM) vs igf3 (VIC) probes based on genomic DNA
amhPos = ddPCRresAmhIgf1$Positives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch1Positive Control")]
amhNeg = ddPCRresAmhIgf1$Negatives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch1Positive Control")]
igf3Pos = ddPCRresAmhIgf1$Positives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch2Positive Control")]
igf3Neg = ddPCRresAmhIgf1$Negatives[which(ddPCRresAmhIgf1$Sample=="e28-DNA" & ddPCRresAmhIgf1$TargetType=="Ch2Positive Control")]
effFactor2 = mean(apply(cbind(amhPos,igf3Pos,amhNeg,igf3Neg), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]]))

Flag and remove amh/igf3 expression data for samples where the RT-ddPCR reaction was saturated with positive droplets (more than 98% of all droplets are positive). In the figures below, saturated samples are plotted in red and non-saturated samples in black.

# can't use reactions where positives are saturated
plot(ddPCRresAmhIgf1$Positives,
     ddPCRresAmhIgf1$Positives+ddPCRresAmhIgf1$Negatives,
     col=as.factor(ddPCRresAmhIgf1$Positives/c(ddPCRresAmhIgf1$Negatives+ddPCRresAmhIgf1$Positives)>0.98),ylab="Total number of droplets", xlab="Number of positive droplets")

# add a flag to samples that are not saturated
ddPCRresAmhIgf1$AbundanceOK = ddPCRresAmhIgf1$Positives/c(ddPCRresAmhIgf1$Negatives+ddPCRresAmhIgf1$Positives)<0.98

# clean for samples where Amh:Igf1 reaction has OK negative:positive droplet numbers (on both channels)
ddPCRresAmhIgf1$Remove = NA
for (i in unique(ddPCRresAmhIgf1$Sample[which(ddPCRresAmhIgf1$Sample != "H2O")])){
  tmp = ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),]
  if (all(tmp$AbundanceOK)){
    ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),"Remove"] = FALSE
  }
  else {
    ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Sample == i),"Remove"] = TRUE
  }
}
ddPCRresAmhIgf1$Remove[which(ddPCRresAmhIgf1$Sample == "H2O")] = TRUE

ddPCRresAmhIgf1Clean = data.frame()
ddPCRresAmhIgf1Clean = ddPCRresAmhIgf1[which(ddPCRresAmhIgf1$Remove == F),]

# verify that abundances are now ok
plot(ddPCRresAmhIgf1Clean$Positives,
     ddPCRresAmhIgf1Clean$Positives+ddPCRresAmhIgf1Clean$Negatives,
     col=as.factor(ddPCRresAmhIgf1Clean$Positives/c(ddPCRresAmhIgf1Clean$Negatives+ddPCRresAmhIgf1Clean$Positives)>0.98),ylab="Total number of droplets", xlab="Number of positive droplets")

Clean for any samples not analyzed for vgll3.

# clean results from any samples not analyzed for vgll3
ddPCRresAmhIgf1Clean = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$Sample %in% plotData2$Sample),]

Split the ddPCR output file into Channel 1 (Amh) and Channel 2 (Igf1) to avoid bugs.

ddPCRresAmh = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$TargetType == "Ch1Unknown"),]
ddPCRresIgf1 = ddPCRresAmhIgf1Clean[which(ddPCRresAmhIgf1Clean$TargetType == "Ch2Unknown"),]

Add columns to existing data frame.

# add Amh copies
plotData2$AmhConc = NA
plotData2$AmhConc = replace(plotData2$AmhConc,
                              match(ddPCRresAmh$Sample,
                                    plotData2$Sample),
                              ddPCRresAmh$CopiesPer20uLWell[which(ddPCRresAmh$TargetType == "Ch1Unknown")])

# add Igf1 copies
plotData2$Igf1Conc = NA
plotData2$Igf1Conc = replace(plotData2$Igf1Conc,
                              match(ddPCRresIgf1$Sample,
                                    plotData2$Sample),
                              ddPCRresIgf1$CopiesPer20uLWell[which(ddPCRresIgf1$TargetType == "Ch2Unknown")])

# Amh:Igf1 ratio of concentrations
plotData2$AmhIgf1ConcRatio = NA
plotData2$AmhIgf1ConcRatio = replace(plotData2$AmhIgf1ConcRatio,
                              match(ddPCRresAmh$Sample,
                                    plotData2$Sample),
                              ddPCRresAmh$Ratio[which(ddPCRresAmh$TargetType == "Ch1Unknown")])

# Amh:Igf1 fractional abundance
plotData2$AmhIgf1FA = NA
plotData2$AmhIgf1FA = replace(plotData2$AmhIgf1FA,
                              match(ddPCRresAmh$Sample,
                                    plotData2$Sample),
                              ddPCRresAmh$FractionalAbundance[which(ddPCRresAmh$TargetType == "Ch1Unknown")])

# Amh:igf3 Poisson rate ratio (add speudocount +1 to avoid Inf values)
plotData2$AmhPos = NA
plotData2$AmhPos = replace(plotData2$AmhPos,
                              match(ddPCRresAmh$Sample,
                                    plotData2$Sample),
                              ddPCRresAmh$"Ch1+Ch2-"[which(ddPCRresAmh$TargetType == "Ch1Unknown")])
plotData2$AmhPos = plotData2$AmhPos + 1
  
plotData2$Igf1Pos = NA
plotData2$Igf1Pos = replace(plotData2$Igf1Pos,
                              match(ddPCRresIgf1$Sample,
                                    plotData2$Sample),
                              ddPCRresIgf1$"Ch1-Ch2+"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")])
plotData2$Igf1Pos = plotData2$Igf1Pos + 1

plotData2$AmhNeg = NA
plotData2$AmhNeg = replace(plotData2$AmhNeg,
                              match(ddPCRresAmh$Sample,
                                    plotData2$Sample),
                              c(ddPCRresAmh$"Ch1-Ch2-"[which(ddPCRresAmh$TargetType == "Ch1Unknown")]+ddPCRresAmh$"Ch1-Ch2+"[which(ddPCRresAmh$TargetType == "Ch1Unknown")]))
plotData2$AmhNeg = plotData2$AmhNeg + 1

plotData2$Igf1Neg = NA
plotData2$Igf1Neg = replace(plotData2$Igf1Neg,
                              match(ddPCRresIgf1$Sample,
                                    plotData2$Sample),
                              c(ddPCRresIgf1$"Ch1-Ch2-"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")]+ddPCRresIgf1$"Ch1+Ch2-"[which(ddPCRresIgf1$TargetType == "Ch2Unknown")]))
plotData2$Igf1Neg = plotData2$Igf1Neg + 1

Subset for males with Amh and Igf1 expression data

plotData3 = plotData2[which(is.na(plotData2$AmhIgf1ConcRatio) == F & plotData2$Gonads != "m"),]

plotData3$AmhIgf3PoissonRateRatio = apply(plotData3[,c("AmhPos","Igf1Pos","AmhNeg","Igf1Neg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]),r=effFactor2)[["estimate"]])

Amh/igf3 log- Poisson rate ratio in mature, immature and pubertal gonads.

# tissue labels
tissueLabels = c('IT'="Immature","PT"="Pubertal","MT"="Mature")

ggplot(plotData3)+
  geom_quasirandom(aes(TissueOrGonadType,log(AmhIgf3PoissonRateRatio, base=2),color=TissueOrGonadType),size=5,alpha=0.7) +
  scale_color_manual(values=c("darkgrey","black","grey")) + 
  theme_classic() +
  facet_wrap(~VGLL3) +
  scale_x_discrete(breaks=c("IT","PT","MT"),labels=c("Immature","Pubertal","Mature")) + 
  theme(axis.text.x=element_text(size=14,color="black"),
       axis.title.x=element_blank(),
       axis.ticks.x=element_blank(),
       axis.text.y=element_text(size=14,color="black"),
       axis.title.y=element_text(size=14,color="black"),
       axis.line.x = element_blank(),
       legend.position="none") +
  ylab("log2 amh/igf3 rate ratio") + 
  xlab("Gonad maturity")

Unsupervised clustering of samples according to amh/igf3 ratio and vgll3 expression using Gaussian mixture modeling as implemented in the package Mclust.

clustTable = as.data.frame(cbind(plotData3$TotCopies,log(plotData3$AmhIgf3PoissonRateRatio, base=2)))

BIC = mclustBIC(clustTable)
summary(BIC)

## Best BIC values:
##              VEI,2       VVI,2       VEE,2
## BIC      -977.5309 -978.777440 -981.250501
## BIC diff    0.0000   -1.246513   -3.719575

mod = Mclust(clustTable,x=BIC)
summary(mod,parameter=T)

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VEI (diagonal, equal shape) model with 2 components: 
## 
##  log-likelihood  n df       BIC       ICL
##       -472.5237 58  8 -977.5309 -980.0985
## 
## Clustering table:
##  1  2 
## 40 18 
## 
## Mixing probabilities:
##         1         2 
## 0.7089881 0.2910119 
## 
## Means:
##          [,1]       [,2]
## V1 103.523888 23.4819369
## V2   9.350553  0.3410666
## 
## Variances:
## [,,1]
##          V1       V2
## V1 2202.634  0.00000
## V2    0.000 26.19155
## [,,2]
##         V1       V2
## V1 187.219 0.000000
## V2   0.000 2.226224

# plots of best model/classification
fviz_mclust(mod, "BIC", palette = "jco")

fviz_mclust(mod, "classification", geom = "point", pointsize = 1.5, palette = "jco", stand=F, ellipse.level=0.68)

fviz_mclust(mod, "uncertainty", palette = "jco", stand=F)

plot(mod,what="density")

plot(mod, what = "density", type = "persp")

LRT = mclustBootstrapLRT(clustTable, nboot=999, modelName = "VEI")
LRT

## ------------------------------------------------------------- 
## Bootstrap sequential LRT for the number of mixture components 
## ------------------------------------------------------------- 
## Model        = VEI 
## Replications = 999 
##              LRTS bootstrap p-value
## 1 vs 2   55.07579             0.001
## 2 vs 3   10.67646             0.293

Plot of Mclust best classification (vs vgll3 genotype).

# visualize Mclust clustering confidence interval by adding a layer of this ggplot object
ellipsePlot = fviz_mclust(mod, "classification", geom = "point", stand=F, ellipse.level=0.95) 

ggplot(plotData3)+
  geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=VGLL3),size=4,alpha=0.7)+
  #scale_color_manual(values=c("black","darkgrey")) + 
  scale_fill_manual(values=c("grey","grey", "blue","darkgreen","orange","red")) +
  #scale_fill_manual(values=c("slategrey","grey")) +
  scale_shape_manual(values=c(21,22,23)) +
  ellipsePlot$layers[[2]] +
  geom_label(aes(x=60,y=-4), label="Mature cluster") +
  geom_label(aes(x=120,y=22), label="Immature cluster") +
  theme_classic() +
  ylab("log2 amh/igf3 rate ratio") + 
  xlab("vgll3 copies / ng RNA") +
  theme(axis.text=element_text(size=14,color="black"),
        axis.title=element_text(size=14,color="black"),
        legend.position="top")

Plot of Mclust best model classification. We observe that “pubertal” samples show a large range of amh/igf3 expression ratio and vgll3 expression, indicating that the class contains samples that were either in ht early stages of transitioning towards maturity, or samples that had reached maturity while resembling immature gonads more than mature gonads in morphology.

plotData3$clust = mod$classification

bxFig = ggplot(plotData3)+
          geom_quasirandom(aes(TissueOrGonadType,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType),color="black",fill="darkgrey",size=5,alpha=0.7) +
          scale_fill_manual(values=c("darkgrey","darkgrey")) + 
          scale_shape_manual(values=c(21,22,23)) +
          theme_classic() +
          scale_x_discrete(breaks=c("IT","MT","PT"),labels=c("Immature","Mature","Pubertal")) + 
          theme(axis.text.x=element_text(size=14,color="black"),
               axis.title.x=element_blank(),
               axis.ticks.x=element_blank(),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               axis.line.x = element_blank(),
               legend.position="none") +
          ylab("log2 amh/igf3 rate ratio") + 
          xlab("Gonad maturity")

scatterFig = ggplot(plotData3)+
  geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=as.factor(clust)),size=4,alpha=0.7)+
  scale_color_manual(values=c("black","darkgrey")) + 
  scale_fill_manual(values=c("slategrey","grey")) +
  scale_shape_manual(values=c(21,22,23)) +
  ellipsePlot$layers[[2]] +
  geom_label(aes(x=60,y=-4), label="Mature cluster") +
  geom_label(aes(x=120,y=22), label="Immature cluster") +
  theme_classic() +
  ylab("log2 amh/igf3 rate ratio") + 
  xlab("vgll3 copies / ng RNA") +
  theme(axis.text=element_text(size=14,color="black"),
        axis.title=element_text(size=14,color="black"),
        legend.position="none")

ggarrange(bxFig, scatterFig, 
          labels = c("a","b"),
          ncol = 2, nrow = 1,
          widths = c(0.7, 1))

scatterFig2 = ggplot(plotData3)+
  ellipsePlot$layers[[2]] +
  geom_point(aes(TotCopies,log(AmhIgf3PoissonRateRatio, base=2),shape=TissueOrGonadType,fill=as.factor(clust)),size=3,alpha=0.7)+
  scale_color_manual(values=c("darkgrey","black")) + 
  scale_fill_manual(values=c("grey", "slategrey")) +
  scale_shape_manual(values=c(21,22,23)) +
  geom_label(aes(x=60,y=-4), label="Mature cluster",size=3) +
  geom_label(aes(x=120,y=22), label="Immature cluster",size=3) +
  theme_classic() +
  ylab(expression('log2'~italic(amh/igf3)~'rate ratio')) +
  xlab(expression(~italic(vgll3)~'copies / ng RNA')) +
  theme(axis.text=element_text(size=10,color="black"),
        axis.title=element_text(size=10,color="black"),
        legend.position="none")

scatterFig2

Modeling EE versus LL and immature versus mature gonad expression

Below we calculate age for samples based on date when 90% of eggs had hatched.

convertToDate = function(x){
  x = as.character(x)
  if (strsplit(x,"") %>% unlist() %>% length() == 8){
      day = unlist(strsplit(x,""))[1:2] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("d",c("V1","V2"),sep="")
      month = unlist(strsplit(x,""))[3:4] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("m",c("V1","V2"),sep="")
      year = unlist(strsplit(x,""))[5:8] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("y",c("V1","V2","V3","V4"),sep="")
      return(paste(month,day,year,sep="/"))
  }
  else {
      day = paste(0,unlist(strsplit(x,""))[1],sep="") 
      month = unlist(strsplit(x,""))[2:3] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("m",c("V1","V2"),sep="")
      year = unlist(strsplit(x,""))[4:7] %>% as.numeric() %>% as.matrix() %>% t() %>% as.data.frame() %>% unite("y",c("V1","V2","V3","V4"),sep="")
      return(paste(month,day,year,sep="/"))
  }
}

daysPostHatch = function(x){
  datetimes = strptime(c("12/29/2017",x), format = "%m/%d/%Y")
  return(as.numeric(difftime(datetimes[2], datetimes[1], units = "days")))
}

# OUL warm 90% hatch date 12/29/2017
plotData2$Date = unlist(lapply(plotData2$CollectionDate,convertToDate))
plotData2$DPH = unlist(lapply(plotData2$Date,daysPostHatch))
plotData3$Date = unlist(lapply(plotData3$CollectionDate,convertToDate))
plotData3$DPH = unlist(lapply(plotData3$Date,daysPostHatch))

Plot of tissue expression levels as above but where age is converted from months post-hatch to days post-hatch.

plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT", "MT","SG","Sp"))

tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")

allFigDPH = ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT","MT","SG","Sp")),],aes(DPH, TotCopies)) +
 geom_smooth(data = plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT","MT")),],fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25) +
 theme_classic() +
 theme(axis.text=element_text(size=10,color="black"),
       axis.title=element_text(size=10,color="black"),
       strip.text.x=element_text(size=10),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 ylim(0,350) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)") 
allFigDPH

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 39 rows containing missing values (geom_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# color points according to vgll3 genotypes
allFigDPHcolor = ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT")),],aes(DPH, TotCopies,colour=VGLL3RECPRO)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(fill="black",alpha=0.25) +
 scale_color_manual(name=expression(~italic("vgll3")), values=c( "blue","darkgrey","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=14,color="black"),
       axis.title=element_text(size=14,color="black"),
       strip.text.x=element_text(size=14),
       legend.position="right",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)") 
allFigDPHcolor

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Reordered panels for presentation.

plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Ad","Ov","IT","Hr","Liv","Mc","Br","Fry","PT", "SG","MT","Sp"))

tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr","Liv","Mc","Ad","Ov","IT")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25) +
 theme_classic() +
 theme(axis.text=element_text(size=10,color="black"),
       axis.title=element_text(size=10,color="black"),
       strip.text.x=element_text(size=10),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggarrange(allFigDPH,                                                 
          ggarrange(testesFig2,scatterFig2, ncol = 2, labels = c("B", "C"), widths = c(0.7, 1)),
          nrow = 2, heights=c(1.5,1),
          labels = "A")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 39 rows containing missing values (geom_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

Plot all tissues for supplemental figure.

# re-order tissues
plotData2$TissueOrGonadType = factor(plotData2$TissueOrGonadType, levels=c("Fry","Hr","Liv","Mc","Ad","Br","Ov","IT","PT", "SG","MT","Sp"))
tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","PT"="Pubertal testes", "MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")

# color points according to vgll3 genotypes
allTissuesGenotypes = ggplot(plotData2,aes(DPH, TotCopies,colour=VGLL3RECPRO)) +
 #geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(fill="black",alpha=0.25) +
 scale_color_manual(name=expression(~italic("vgll3")), values=c( "blue","darkgrey","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=14,color="black"),
       axis.title=element_text(size=14,color="black"),
       strip.text.x=element_text(size=14),
       legend.position="right",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")
allTissuesGenotypes

Individual plots

tissueLabels = c('Fry'='Whole fry','Liv'="Liver",'Mc'="Muscle","Hr"="Heart","Ad"="Adipose","Gn"="Gonad", "Br"="Brain", "Ov" = "Immature ovary","IT"="Immature testes","MT"="Mature testes","SG"="Stripped testes","Sp"="Ejaculate")

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Liv")),],aes(DPH, TotCopies, color=VGLL3)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Mc")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ad")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ov")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("IT")),],aes(DPH, TotCopies)) +
 geom_smooth(fill="grey70",colour="aquamarine4") +
 geom_point(colour="black",fill="black",alpha=0.25,size=4) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
# scale_x_discrete(breaks=seq(2,14,2)) +
 facet_wrap(~TissueOrGonadType,labeller=labeller(TissueOrGonadType=tissueLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Average plots.

sexLabels = c('m'="Female","s"="Immature male","xl"="Mature male")

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Hr")),],aes(VGLL3, TotCopies,color=VGLL3)) +
 geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) + 
 stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
 geom_quasirandom(alpha=0.25,size=4) +
 scale_color_manual(values=c("blue","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 ggtitle("Heart") +
 xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ad")),],aes(VGLL3, TotCopies,color=VGLL3)) +
 geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) +
 stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
 geom_quasirandom(alpha=0.25,size=4) +
 scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 ggtitle("Adipose") +
 xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

#t.test(plotData2$TotCopies[which(plotData2$VGLL3=="EE" & plotData2$TissueOrGonadType %in% c("Ad"))],plotData2$TotCopies[which(plotData2$VGLL3=="LL" & plotData2$TissueOrGonadType %in% c("Ad"))])

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Mc")),],aes(VGLL3, TotCopies,color=VGLL3)) +
 geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) + 
 stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
 geom_quasirandom(alpha=0.25,size=4) +
 scale_color_manual(values=c("blue","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 ggtitle("Muscle") +
  xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("Ov")),],aes(VGLL3, TotCopies,color=VGLL3)) +
 geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) + 
 stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
 geom_quasirandom(alpha=0.25,size=4) +
 scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 ggtitle("Ovaries") +
  xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

ggplot(plotData2[which(plotData2$TissueOrGonadType %in% c("IT")),],aes(VGLL3, TotCopies,color=VGLL3)) +
 geom_boxplot(aes(VGLL3,TotCopies,color=VGLL3),outlier.alpha = 0) + 
 stat_summary(fun.y=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
 geom_quasirandom(alpha=0.25,size=4) +
 scale_color_manual(values=c("blue","darkgreen", "darkorange","red")) +
 theme_classic() +
 theme(axis.text=element_text(size=16,color="black"),
       axis.title=element_text(size=16,color="black"),
       strip.text.x=element_text(size=16),
       legend.position="none",
       strip.background = element_blank()) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=sexLabels)) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 ggtitle("Immature testes") +
 xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

Plot of EE & LL vgll3 expression time-series (immature gonads). Time-point 13 is exlcuded because there’s no immature EE data.

# subset for male gonads (immature) and only EE & LL genotypes - exclude TP 13 because only LL data
plotData4 = plotData2[which(plotData2$TissueOrRep=="Gn" & plotData2$TissueOrGonadType %in% c("IT") & plotData2$VGLL3 %in% c("EE","LL") & plotData2$Timepoint!=13),]

# fit polynomial regression - test models with and without interaction term 
fitSimple <- lm(plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$DPH)
fitFull <- lm(plotData4$TotCopies ~  plotData4$VGLL3 + plotData4$VGLL3 * plotData4$DPH)
fitVgll3 = lm(plotData4$TotCopies ~ plotData4$VGLL3)
fitAge = lm(plotData4$TotCopies ~ plotData4$DPH)

# select for the best model
AIC(fitSimple,fitFull)

##           df      AIC
## fitSimple  4 354.3633
## fitFull    5 355.6761

BIC(fitSimple,fitFull)

##           df      BIC
## fitSimple  4 360.6973
## fitFull    5 363.5937

# significance of the models
summary(fitSimple)

## 
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$DPH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.077 -18.849   7.399  19.450  59.502 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        57.3778    33.6170   1.707   0.0973 .
## plotData4$VGLL3LL  21.8992    10.7709   2.033   0.0501 .
## plotData4$DPH       0.1105     0.1238   0.893   0.3782  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.04 on 33 degrees of freedom
## Multiple R-squared:  0.1315, Adjusted R-squared:  0.07881 
## F-statistic: 2.497 on 2 and 33 DF,  p-value: 0.09774

summary(fitFull)

## 
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$VGLL3 * 
##     plotData4$DPH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -93.872 -19.611   8.308  16.811  58.086 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                      96.18312   59.87614   1.606    0.118
## plotData4$VGLL3LL               -33.98100   71.97730  -0.472    0.640
## plotData4$DPH                    -0.03725    0.22563  -0.165    0.870
## plotData4$VGLL3LL:plotData4$DPH   0.21245    0.27053   0.785    0.438
## 
## Residual standard error: 31.22 on 32 degrees of freedom
## Multiple R-squared:  0.1479, Adjusted R-squared:  0.06799 
## F-statistic: 1.851 on 3 and 32 DF,  p-value: 0.1578

summary(fitVgll3)

## 
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$VGLL3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -99.847 -18.269   7.949  19.434  61.923 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         86.402      8.582  10.067 9.82e-12 ***
## plotData4$VGLL3LL   22.062     10.737   2.055   0.0477 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.94 on 34 degrees of freedom
## Multiple R-squared:  0.1105, Adjusted R-squared:  0.08429 
## F-statistic: 4.222 on 1 and 34 DF,  p-value: 0.04765

summary(fitAge)

## 
## Call:
## lm(formula = plotData4$TotCopies ~ plotData4$DPH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -88.026 -20.139  -1.631  26.981  67.315 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    70.2455    34.5039   2.036   0.0496 *
## plotData4$DPH   0.1148     0.1293   0.888   0.3809  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.44 on 34 degrees of freedom
## Multiple R-squared:  0.02265,    Adjusted R-squared:  -0.006092 
## F-statistic: 0.7881 on 1 and 34 DF,  p-value: 0.3809

# are the slopes of the linear model different for EE and LL?
# option 1: look at interaction term
anova(lm(plotData4$TotCopies ~ plotData4$VGLL3 + plotData4$VGLL3 * plotData4$DPH))

## Analysis of Variance Table
## 
## Response: plotData4$TotCopies
##                               Df  Sum Sq Mean Sq F value  Pr(>F)  
## plotData4$VGLL3                1  4042.6  4042.6  4.1480 0.05003 .
## plotData4$DPH                  1   768.4   768.4  0.7885 0.38119  
## plotData4$VGLL3:plotData4$DPH  1   601.0   601.0  0.6167 0.43805  
## Residuals                     32 31186.9   974.6                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# calculate fitted values for means and 95% confidence interval
plotData4$fittedSimple = predict(fitSimple)
plotData4$fittedSimpleLwr = predict(fitSimple,interval="confidence")[,'lwr']
plotData4$fittedSimpleUpr = predict(fitSimple,interval="confidence")[,'upr']

# confidence interval for EE-LL contrast
marginal= emmeans(fitSimple, ~ VGLL3, weights="proportional")
pairs(marginal)

##  contrast estimate   SE df t.ratio p.value
##  EE - LL     -21.9 10.8 33 -2.033  0.0501

# contrast p-value
my_comparisons1  = data.frame(start=c("EE"), 
                            end=c("LL"), 
                            y=c(145,145),
                            label=c("*"))

# plot EE & LL
expPlot = ggplot(plotData4, aes(DPH,TotCopies,colour=VGLL3,group=VGLL3)) +
  geom_jitter(size=4,alpha=0.7) +
  theme_classic() + 
    scale_colour_manual(values=c("blue","red")) + 
  geom_smooth(aes(DPH,fittedSimple),method="loess") +
  #geom_ribbon(aes(ymin=fittedSimpleLwr, ymax=fittedSimpleUpr), alpha=0.1, color=NA) +
  xlab("Age (days post-hatch)") + 
  ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
  #geom_text(aes(label=Sample),position=position_jitter(width=0.2),hjust=0, vjust=0,size=3) + 
  ylim(0,250) + 
  theme(axis.text=element_text(size=14,color="black"),
        axis.title=element_text(size=14,color="black")
        ,plot.title = element_text(size=14, face="bold",hjust=0.5),
        legend.position = "none")

insert = ggplot(as.data.frame(marginal)) + 
  geom_pointrange(aes(y=emmean, ymin=lower.CL, ymax=upper.CL, x=VGLL3,color=VGLL3),size=1) + 
  scale_color_manual(values=c("blue","red")) +
  geom_signif(data=my_comparisons1,
              aes(xmin=start, xmax=end, annotations=label, y_position=y),
              textsize = 3, size=0.5, tip_length = 0.01,
              manual=TRUE) +
  ylab("EMM") + 
  xlab(expression(~italic(vgll3)~'')) +
  theme_classic() +
  theme(axis.text=element_text(size=14,color="black"),
        axis.title=element_text(size=14,color="black")
        ,plot.title = element_text(size=14, face="bold",hjust=0.5),
        legend.position = "none")

## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position

ggdraw() +
  draw_plot(expPlot) +
  draw_plot(insert, x = 0.17, y = .65, width = .2, height = .3)

## `geom_smooth()` using formula 'y ~ x'

Model female gonad EE and LL expression.

# subset for female gonads (both immature and mature) and only EE & LL genotypes
plotData8 = plotData2[which(plotData2$TissueOrRep=="Gn" & plotData2$TissueOrGonadType %in% c("Ov") & plotData2$VGLL3 %in% c("EE","LL")),]

# fit linear regression - alternatively (use poly() to perform the polynomial regressions)
fitSimple <- lm(plotData8$TotCopies ~ plotData8$VGLL3 + plotData8$DPH)
Anova(fitSimple)

## Anova Table (Type II tests)
## 
## Response: plotData8$TotCopies
##                 Sum Sq Df F value   Pr(>F)   
## plotData8$VGLL3  607.2  1  2.7967 0.105598   
## plotData8$DPH   2620.0  1 12.0668 0.001688 **
## Residuals       6079.4 28                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# significance of the model
summary(fitSimple)

## 
## Call:
## lm(formula = plotData8$TotCopies ~ plotData8$VGLL3 + plotData8$DPH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.589  -8.882  -2.587   8.741  28.881 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       78.93409   13.88427   5.685 4.28e-06 ***
## plotData8$VGLL3LL  8.86503    5.30100   1.672  0.10560    
## plotData8$DPH     -0.17792    0.05122  -3.474  0.00169 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.74 on 28 degrees of freedom
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.3089 
## F-statistic: 7.706 on 2 and 28 DF,  p-value: 0.002156

# confidence interval for EE-LL contrast
marginal= emmeans(fitSimple, ~ VGLL3, weights="proportional")
pairs(marginal)

##  contrast estimate  SE df t.ratio p.value
##  EE - LL     -8.87 5.3 28 -1.672  0.1056

# calculate fitted values
plotData8$fittedSimple = predict(fitSimple)

# plot EE & LL
ggplot(plotData8, aes(DPH,TotCopies,color=VGLL3,group=VGLL3)) +
  geom_jitter(size=4,alpha=0.7) +
  theme_classic() + 
  scale_color_manual(values=c("blue","red")) + 
  geom_smooth(aes(DPH,fittedSimple),method="loess") +
  xlab("Age (days post-hatch)") + 
  ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
  theme(axis.text=element_text(size=14,color="black"),
        axis.title=element_text(size=14,color="black"),
        plot.title = element_text(size=14, face="bold",hjust=0.5),
        legend.position = "none")

## `geom_smooth()` using formula 'y ~ x'

Plot heart expression for males and females. Compile plot data and fitted values from lm into one data frame

# split by gonad type
plotData10 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("s") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData11 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("m") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData12 = plotData2[which(plotData2$TissueOrRep=="Hr" & plotData2$Gonads %in% c("xl") & plotData2$VGLL3 %in% c("EE","LL")),]

# predict fitted values
fitSimple1 <- lm(plotData10$TotCopies ~ plotData10$VGLL3 + plotData10$DPH)
Anova(fitSimple1)

## Anova Table (Type II tests)
## 
## Response: plotData10$TotCopies
##                  Sum Sq Df F value  Pr(>F)  
## plotData10$VGLL3  26197  1  3.7175 0.06483 .
## plotData10$DPH    23611  1  3.3505 0.07867 .
## Residuals        183219 26                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plotData10$fittedSimple = predict(fitSimple1)

fitSimple2 <- lm(plotData11$TotCopies ~ plotData11$VGLL3 + plotData11$DPH)
Anova(fitSimple2)

## Anova Table (Type II tests)
## 
## Response: plotData11$TotCopies
##                  Sum Sq Df F value Pr(>F)
## plotData11$VGLL3    690  1  0.1513 0.6998
## plotData11$DPH    10259  1  2.2490 0.1432
## Residuals        150530 33

plotData11$fittedSimple = predict(fitSimple2)

fitSimple3 <- lm(plotData12$TotCopies ~ plotData12$VGLL3 + plotData12$DPH)
Anova(fitSimple3)

## Anova Table (Type II tests)
## 
## Response: plotData12$TotCopies
##                  Sum Sq Df F value Pr(>F)
## plotData12$VGLL3 1830.0  1  1.0108 0.3609
## plotData12$DPH    725.7  1  0.4008 0.5545
## Residuals        9052.6  5

plotData12$fittedSimple = predict(fitSimple3)

# combine into one data frame
plotDataHeart = rbind(plotData10,plotData11,plotData12)

# plot
tissueLabels = c("m"="Female","s"="Immature male","xl"="Mature male")
ggplot(plotDataHeart,aes(DPH,TotCopies,group=VGLL3, color=VGLL3)) +
 geom_point(aes(DPH, TotCopies),size=3,alpha=0.7) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=tissueLabels)) +
 geom_line(aes(y=fittedSimple)) +
 theme_classic() +
 scale_colour_manual(values=c("blue","red")) +
 theme(axis.title.y=element_text(size=16),
       axis.text.y=element_text(size=16, color="black"),
       axis.title.x=element_text(size=16),
       axis.text.x=element_text(hjust=1, vjust=0.5, size=16, color="black", angle=90),
       strip.text.x=element_text(size=16),
       strip.background = element_blank()) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

# heart expression difference between immature and mature maless - means
t.test(
plotData2$TotCopies[which(plotData2$TissueOrRep=='Hr' & plotData2$Gonads == "xl")],
plotData2$TotCopies[which(plotData2$TissueOrRep=='Hr' & plotData2$Gonads == "s")])

## 
##  Welch Two Sample t-test
## 
## data:  plotData2$TotCopies[which(plotData2$TissueOrRep == "Hr" & plotData2$Gonads == "xl")] and plotData2$TotCopies[which(plotData2$TissueOrRep == "Hr" & plotData2$Gonads == "s")]
## t = -3.0359, df = 27.8, p-value = 0.005163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -112.96981  -21.92361
## sample estimates:
## mean of x mean of y 
##  61.85497 129.30168

Plot adipose expression for males and females.

# split by gonad type
plotData13 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("s") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData14 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("m") & plotData2$VGLL3 %in% c("EE","LL")),]
plotData15 = plotData2[which(plotData2$TissueOrRep=="Ad" & plotData2$Gonads %in% c("xl") & plotData2$VGLL3 %in% c("EE","LL")),]

# predict fitted values
fitSimple4 <- lm(plotData13$TotCopies ~ plotData13$VGLL3 + plotData13$DPH)
Anova(fitSimple4)

## Anova Table (Type II tests)
## 
## Response: plotData13$TotCopies
##                  Sum Sq Df F value Pr(>F)
## plotData13$VGLL3  468.4  1  1.9737 0.1734
## plotData13$DPH      3.0  1  0.0125 0.9118
## Residuals        5458.2 23

plotData13$fittedSimple = predict(fitSimple4)

fitSimple5 <- lm(plotData14$TotCopies ~ plotData14$VGLL3 + plotData14$DPH)
Anova(fitSimple5)

## Anova Table (Type II tests)
## 
## Response: plotData14$TotCopies
##                   Sum Sq Df F value Pr(>F)
## plotData14$VGLL3     1.0  1  0.0030 0.9566
## plotData14$DPH     504.5  1  1.5226 0.2259
## Residuals        10934.3 33

plotData14$fittedSimple = predict(fitSimple5)

fitSimple6 <- lm(plotData15$TotCopies ~ plotData15$VGLL3 + plotData15$DPH)
Anova(fitSimple6)

## Anova Table (Type II tests)
## 
## Response: plotData15$TotCopies
##                   Sum Sq Df F value Pr(>F)
## plotData15$VGLL3  185.97  1  0.2881 0.6199
## plotData15$DPH     59.98  1  0.0929 0.7757
## Residuals        2582.12  4

plotData15$fittedSimple = predict(fitSimple6)

# combine into one data frame
plotDataAdipose = rbind(plotData13,plotData14,plotData15)

# plot
tissueLabels = c("m"="Female","s"="Immature male","xl"="Mature male")
ggplot(plotDataAdipose,aes(DPH,TotCopies,group=VGLL3, color=VGLL3)) +
 geom_point(aes(DPH, TotCopies),size=3,alpha=0.7) +
 facet_wrap(~Gonads,labeller=labeller(Gonads=tissueLabels)) +
 geom_line(aes(y=fittedSimple)) +
 theme_classic() +
 scale_colour_manual(values=c("blue","red")) +
 theme(axis.title.y=element_text(size=16),
       axis.text.y=element_text(size=16, color="black"),
       axis.title.x=element_text(size=16),
       axis.text.x=element_text(hjust=1, vjust=0.5, size=16, color="black", angle=90),
       strip.text.x=element_text(size=16),
       strip.background = element_blank()) +
 ylab(expression(~italic(vgll3)~'copies / ng RNA')) +
 xlab("Age (days post-hatch)")

Comparison of vgll3 exon expression

Data from averaaged replicates (N=2).

colnames(scamp1Means)[2]="TotCopies"
colnames(vgll3Exon1Means)[2]="TotCopies"

scamp1Means$Assay = "scamp1"
vgll3Mis1Means$Assay = "vgll3Mis1"
vgll3Mis2Means$Assay = "vgll3Mis2"
vgll3Exon1Means$Assay = "vgll3Exon1"

head(scamp1Means)

##   Sample TotCopies sd sem n VGLL3  Assay
## 1 282-Gn  419.3939 NA  NA 1    EE scamp1
## 2 283-Gn  517.5627 NA  NA 1    LL scamp1
## 3 300-Gn  474.7628 NA  NA 1    LL scamp1
## 4 320-Gn  460.4520 NA  NA 1    LL scamp1
## 5 327-Gn  571.4286 NA  NA 1    EE scamp1
## 6 328-Gn  360.8355 NA  NA 1    LL scamp1

head(vgll3Mis1Means)

##   Sample TotCopies        sd       sem n     Assay VGLL3
## 1 282-Gn   86.0067 25.173906 17.800639 2 vgll3Mis1    EE
## 2 283-Gn  108.5455 10.564514  7.470240 2 vgll3Mis1    LL
## 3 300-Gn  113.9462 20.935795 14.803843 2 vgll3Mis1    LL
## 4 320-Gn  103.7675  1.914799  1.353968 2 vgll3Mis1    LL
## 5 327-Gn  101.9139 71.793047 50.765350 2 vgll3Mis1    EE
## 6 328-Gn  110.6383 27.418472 19.387787 2 vgll3Mis1    LL

head(vgll3Mis2Means)

##   Sample TotCopies        sd       sem n VGLL3     Assay
## 1 282-Gn  69.98485 21.877455 15.469697 2    EE vgll3Mis2
## 2 283-Gn  77.65591  5.200656  3.677419 2    LL vgll3Mis2
## 3 300-Gn  76.66034        NA        NA 1    LL vgll3Mis2
## 4 320-Gn  62.76412 22.789213 16.114407 2    LL vgll3Mis2
## 5 327-Gn 110.51213 29.732792 21.024259 2    EE vgll3Mis2
## 6 328-Gn  63.20627 16.571777 11.718016 2    LL vgll3Mis2

head(vgll3Exon1Means)

##   Sample TotCopies sd sem n VGLL3      Assay
## 1 282-Gn  103.9394 NA  NA 1    EE vgll3Exon1
## 2 283-Gn  147.6703 NA  NA 1    LL vgll3Exon1
## 3 300-Gn  129.7913 NA  NA 1    LL vgll3Exon1
## 4 320-Gn  122.3164 NA  NA 1    LL vgll3Exon1
## 5 327-Gn  214.5553 NA  NA 1    EE vgll3Exon1
## 6 328-Gn  126.3708 NA  NA 1    LL vgll3Exon1

combinedAssays = rbind(scamp1Means,vgll3Exon1Means,vgll3Mis1Means,vgll3Mis2Means)
head(combinedAssays)

##   Sample TotCopies sd sem n VGLL3  Assay
## 1 282-Gn  419.3939 NA  NA 1    EE scamp1
## 2 283-Gn  517.5627 NA  NA 1    LL scamp1
## 3 300-Gn  474.7628 NA  NA 1    LL scamp1
## 4 320-Gn  460.4520 NA  NA 1    LL scamp1
## 5 327-Gn  571.4286 NA  NA 1    EE scamp1
## 6 328-Gn  360.8355 NA  NA 1    LL scamp1

Plot expression levels in different exons.

assayLabels = c("vgll3Exon1"="Exon 1-2 boundary","vgll3Mis1"="Exon 2","vgll3Mis2"="Exon 3")

ggplot(combinedAssays[which(combinedAssays$Assay != "scamp1"),],aes(VGLL3,TotCopies,color=VGLL3))+
  #geom_boxplot(outlier.alpha = 0) + 
  geom_quasirandom(size=5,alpha=0.3) +
  stat_summary(fun=mean, geom="point", shape=17, size=4, color="black", fill="black",alpha=0.5) +
  scale_color_manual(values=c("blue","red")) + 
  facet_wrap(~Assay, labeller=labeller(Assay=assayLabels)) +
  theme_classic() +
  theme(axis.text.x=element_text(size=14,color="black"),
       axis.title.x=element_blank(),
       axis.ticks.x=element_blank(),
       axis.text.y=element_text(size=14,color="black"),
       axis.title.y=element_text(size=14,color="black"),
       axis.line.x = element_blank(),
       strip.background=element_blank(),
       strip.text=element_text(size=14,color="black"),
       legend.position="none") +
  xlab("Age (days post-hatch)") + 
  ylab("copies / ng RNA")

## Warning: Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing missing values (position_quasirandom).

t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "EE")],
       combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "LL")])

## 
##  Welch Two Sample t-test
## 
## data:  combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Exon1" & combinedAssays$VGLL3 == "LL")]
## t = 0.58699, df = 18.486, p-value = 0.5643
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -17.37566  30.88477
## sample estimates:
## mean of x mean of y 
##  138.0463  131.2918

t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")],
       combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")])

## 
##  Welch Two Sample t-test
## 
## data:  combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")]
## t = -2.5454, df = 31.038, p-value = 0.0161
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -27.539557  -3.039125
## sample estimates:
## mean of x mean of y 
##  91.06771 106.35705

t.test(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")],
       combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")])

## 
##  Welch Two Sample t-test
## 
## data:  combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")] and combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")]
## t = 2.0689, df = 25.169, p-value = 0.04898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   0.08705111 35.88134301
## sample estimates:
## mean of x mean of y 
##  97.23139  79.24719

ASE in vgll3 mis-sense SNP 1

Expression levels of the alleles in heterozygotes. The column FractionalAbundance gives Ch1 versus Ch2 percentage of counts relative to the sum of counts. This is within samples so we don’t need to take into account the starting amount of RNA. If there is a cis-regulatory difference that makes E-allele lower expressed it should be seen as ASE towards the L-allele in heterozygotes (keeping in mind the possibly higher efficiency of the L-allele probe). The analysis takes into account the possibly higher efficiency of the L (C)-probe using 1-effFactor correction.

Channel 1 corresponds to FAM dye and the C-allele (late maturation). Channel 2 corresponds to VIC dye and the T-allele (early maturation).

Below we analyze if Ch1 and Ch2 give the same number of copies in the control DNA. Any difference from 50:50 ratios is due to one probe being lower efficiency (giving lower number of counts). We are calculating a correction factor to account for differences in efficiency based on the mean of all e28 DNA controls we are including in the results table.

controlResMis1 = ddPCRres[which(ddPCRres$Sample == "e28-DNA"),]

# for poisson.test --> based on rate ratio
LnegCountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch1Unknown"),"Negatives"]
EnegCountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch2Unknown"),"Negatives"]
LcountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch1Unknown"),"Positives"]
EcountsCtr = controlResMis1[which(controlResMis1$TargetType == "Ch2Unknown"),"Positives"]
effFactor = apply(cbind(LcountsCtr,EcountsCtr,LnegCountsCtr,EnegCountsCtr), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
effFactor

##  [1] 1.0273950 1.0289535 1.0477013 1.0095218 1.0035206 1.0297707 1.0005037
##  [8] 1.0246971 1.0649968 0.9955902

effFactor = mean(effFactor)
paste("Mean of all control reps:",effFactor)

## [1] "Mean of all control reps: 1.0232650904439"

Below we test the difference between L and E allele counts within each heterozygous samples using a comparison of two Poisson rates. There is no consensus in the literature which test to use. For example in RNAseq people typically use a simple binomial test, but there it’s hard to know the “negative” rate required to model the process using a Poisson distribution. Now we have this - it’s the number of negative droplets - so using Poisson modeling may be more appropriate (this is what the QuantaSoft software from Bio-Rad uses). For this we need the number of positive AND negative counts. See more about comparing Poisson rates here: https://cran.r-project.org/web/packages/rateratio.test/vignettes/rateratio.test.pdf We are using the value stored as effFactor (relative freq of channel 1 positives to channel 2 positives based on genomic DNA from a heterozygote) as a null hypothesis.

LnegCountsHet = plotData2$Ch1Neg[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
EnegCountsHet = plotData2$Ch2Neg[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]
LcountsHet = plotData2$Ch1Pos[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
EcountsHet = plotData2$Ch2Pos[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]

# Initiate table for results of Poisson test
poissonRes = as.data.frame(matrix(data=NA,ncol=10,nrow=length(LcountsHet),dimnames=list(c(plotData2$Sample[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]),c("Sample","vgll3","sex","TotCopies","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue","Timepoint"))))
poissonRes$Sample = rownames(poissonRes)
poissonRes$vgll3 = plotData2$VGLL3[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$SexPheno = plotData2$SexPheno[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$Gonads = plotData2$Gonads[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn' )]
poissonRes$TotCopies = plotData2$TotCopies[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]
poissonRes$Lcounts = LcountsHet
poissonRes$Ecounts = EcountsHet
poissonRes$LcountsNeg = LnegCountsHet
poissonRes$EcountsNeg = EnegCountsHet
poissonRes$Timepoint = plotData2$Timepoint[which(plotData2$VGLL3 %in% c("LE","EL") & plotData2$TissueOrRep == 'Gn')]

# Remove na's (individuals that have been collected but that not yet been analyzed).
poissonRes = poissonRes[which(is.na(poissonRes$TotCopies) == F),]

# Test ASE using Poisson rate ratio
poissonRes$poissonPvalue = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["p.value"]])
poissonRes$poissonRateRatio = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["estimate"]])
poissonRes$rateRatioCI95low = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][1])
poissonRes$rateRatioCI95high = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][2])

# Transfer DPH from previous data frames
poissonRes$DPH = plotData2$DPH[match(poissonRes$Sample,plotData2$Sample)]
head(poissonRes)

##        Sample vgll3 sex TotCopies Lcounts LcountsNeg Ecounts EcountsNeg
## 269-Gn 269-Gn    LE  NA 124.72727    1010      16017     920      16107
## 272-Gn 272-Gn    EL  NA  37.98995     280      15630     227      15683
## 281-Gn 281-Gn    LE  NA  70.98765     680      13632     685      13627
## 284-Gn 284-Gn    EL  NA  87.42857     726      10716     716      10726
## 285-Gn 285-Gn    LE  NA  40.24845     813      10816     740      10889
## 289-Gn 289-Gn    LE  NA  68.40149     562      13452     512      13502
##        poissonPvalue Timepoint SexPheno Gonads poissonRateRatio
## 269-Gn     0.0965949         7       NA      m        1.1039948
## 272-Gn     0.0330781         7       NA      m        1.2376628
## 281-Gn     0.5882697         7       NA      m        0.9923366
## 284-Gn     0.8952450         8       NA      s        1.0149127
## 285-Gn     0.1278868         8       NA      m        1.1060637
## 289-Gn     0.2340215         8       NA      s        1.1017361
##        rateRatioCI95low rateRatioCI95high      DPH
## 269-Gn        1.0086534          1.208464 193.9583
## 272-Gn        1.0351918          1.481012 193.9583
## 281-Gn        0.8911257          1.105031 193.9583
## 284-Gn        0.9141042          1.126860 219.9583
## 285-Gn        0.9999919          1.223535 219.9583
## 289-Gn        0.9756580          1.244324 219.9583

# sort according to Poisson rate ratio
poissonRes$Sample = factor(poissonRes$Sample, levels = poissonRes$Sample[order(poissonRes$poissonRateRatio)])

Plot Poisson rate ratio of L allele expression versus E allele expression. Null hypothesis of 1:1 expression rate is calculated based on heterozygous DNA as template. Points are colored according to wether the 95% confidence interval of rate ratio includes zero.

plotData9 = plotData2[which(plotData2$VGLL3 %in% c("EL","LE") & plotData2$CollectionDate %in% c("19022018","20032018","20042018","17052018") == F & plotData2$TissueOrGonadType == 'IT'),]
plotData9$poissonPvalue = poissonRes$poissonPvalue[match(plotData9$Sample,poissonRes$Sample)]
plotData9$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData9$Sample,poissonRes$Sample)]
plotData9$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData9$Sample,poissonRes$Sample)]
plotData9$rateRatioCI95low = poissonRes$rateRatioCI95low[match(plotData9$Sample,poissonRes$Sample)]
plotData9$rateRatioCI95high = poissonRes$rateRatioCI95high[match(plotData9$Sample,poissonRes$Sample)]

# order by poisson rate ratio
plotData9$Sample = factor(plotData9$Sample, levels = plotData9$Sample[order(plotData9$poissonRateRatio)])

g = ggplot(plotData9,aes(x=Sample,log(poissonRateRatio,base=2),ymin=log(rateRatioCI95low,base=2),ymax=log(rateRatioCI95high,base=2),color=poissonPvalue<0.05)) +
 geom_pointrange(alpha=0.7) +
 geom_hline(yintercept = log(effFactor,base=2), linetype=2) +
 theme_classic() +
 theme(axis.title.x=element_blank(),
       axis.text.x=element_blank(),
       axis.ticks.x=element_blank(),
       axis.line.x = element_blank(),
       axis.text.y=element_text(size=14,color="black"),
       axis.title.y=element_text(size=14,color="black")) +
 scale_color_manual(values=c("darkgrey","black")) +
 theme(legend.position = "none") +
 ylab("Log2 L-rate/E-rate") + 
 ylim(-1.1,1.1)
rateFig = ggMarginal(g, type = "density", fill="darkgrey", margins="y")
rateFig

ggplot(plotData9) +
         geom_quasirandom(aes(1, FractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
         geom_segment(y=mean(plotData9$FractionalAbundance),yend=mean(plotData9$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
         geom_hline(yintercept=mean(controlResMis1$FractionalAbundance,na.rm=T),color="darkgrey",linetype="dotted") +
         theme_classic() +
         ylab("L/(L+E) allele abundance %") +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_blank(),
               axis.ticks.x=element_blank(),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               axis.line.x = element_blank(),
               legend.position="bottom",
               legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
          #guides(colour=guide_legend(title.position = "top")) + 
          scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)

Can ASE account for EE-LL difference in exon 2 assay? Compare number of expressed molecules from each allele.

# average molecule difference between LL and EE
aveHomozDiffExon2 = mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "EE")],na.rm=T) -
       mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis1" & combinedAssays$VGLL3 == "LL")],na.rm=T)

# average molecule difference between L and E
aveAlleleDiffExon2 = mean(plotData9$Ch2copies[which(plotData9$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)] ,na.rm=T) - mean(plotData9$Ch1copies[which(plotData9$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)],na.rm=T)

# how much % does 2*average L-E difference account from LL-EE difference (in molecules)?
2*(aveAlleleDiffExon2) / (aveHomozDiffExon2) * 100

## [1] 88.87064

ASE in vgll3 mis-sense SNP 2

colnames(ddPCRresMis2) = iconv(colnames(ddPCRresMis2), to = "ASCII", sub = "u")
colnames(ddPCRresMis2) = c(colnames(ddPCRresMis2)[-2],"Empty")
colnames(ddPCRresMis2)[1] = "Well"

# assign falsely labeled well
#ddPCRres$Sample[which(ddPCRres$Well == "H04")] = "e28-DNA"
ddPCRresMis2$Sample[which(ddPCRresMis2$Well == "D03")] = "437-Gn"
ddPCRresMis2$Sample[which(ddPCRresMis2$Well == "D09")] = "437-Gn"

# remove failed wells
ddPCRresMis2 = ddPCRresMis2[which(ddPCRresMis2$AcceptedDroplets > 10000),]

# remove samples that could not be re-analyzed from non-diluted RNA samples
ddPCRresMis2 = ddPCRresMis2[which(ddPCRresMis2$Sample %in% c("289-Gn","322-Gn","326-Gn","337-Gn","338-Gn","358-Gn","370-Gn","373-Gn","385-Gn","389-Gn","390-Gn") == F),]

Additional control wells.

colnames(ddPCRresAdd) = iconv(colnames(ddPCRresAdd), to = "ASCII", sub = "u")
colnames(ddPCRresAdd) = c(colnames(ddPCRresAdd)[-2],"Empty")
colnames(ddPCRresAdd)[1] = "Well"

# remove failed wells
ddPCRresAdd = ddPCRresAdd[which(ddPCRresAdd$AcceptedDroplets > 10000),]

controlResMis2 = rbind(ddPCRresMis2[which(ddPCRresMis2$Sample == "e28-DNA"),],ddPCRresAdd[which(ddPCRresAdd$Sample == "e28-DNA"),])

# for poisson.test --> based on rate ratio
LnegCountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch1Unknown"),"Negatives"]
EnegCountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch2Unknown"),"Negatives"]
LcountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch1Unknown"),"Positives"]
EcountsCtr = controlResMis2[which(controlResMis2$TargetType == "Ch2Unknown"),"Positives"]
effFactor = apply(cbind(LcountsCtr,EcountsCtr,LnegCountsCtr,EnegCountsCtr), 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
effFactor

## [1] 0.9125112 0.9086053 1.0547780 1.0334503 1.0157662 1.0311079 0.8821894
## [8] 1.0048434 0.9456879

effFactor = mean(effFactor)
paste("Mean of all control reps:",effFactor)

## [1] "Mean of all control reps: 0.976548842009717"

plotData16 = ddPCRresMis2

LnegCountsHet = plotData16$Negatives[which(plotData16$TargetType == "Ch1Unknown")]
EnegCountsHet = plotData16$Negatives[which(plotData16$TargetType == "Ch2Unknown")]
LcountsHet = plotData16$Positives[which(plotData16$TargetType == "Ch1Unknown")]
EcountsHet = plotData16$Positives[which(plotData16$TargetType == "Ch2Unknown")]

# Initiate table for results of Poisson test
poissonRes = as.data.frame(matrix(data=NA,ncol=6,nrow=length(LcountsHet),
                                  dimnames=list(plotData16$Positives[which(plotData16$TargetType == "Ch1Unknown")],c("Sample","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue"))))
poissonRes$Sample = plotData16$Sample[which(plotData16$TargetType == "Ch1Unknown")]
poissonRes$Lcounts = LcountsHet
poissonRes$Ecounts = EcountsHet
poissonRes$LcountsNeg = LnegCountsHet
poissonRes$EcountsNeg = EnegCountsHet

# Test ASE using Poisson rate ratio
poissonRes$poissonPvalue = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["p.value"]])
poissonRes$poissonRateRatio = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["estimate"]])
poissonRes$rateRatioCI95low = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][1])
poissonRes$rateRatioCI95high = apply(poissonRes[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]), r=effFactor)[["conf.int"]][2])

plotData17 = plotData16[which(plotData16$TargetType == "Ch1Unknown" & plotData16$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F & is.na(plotData16$FractionalAbundance) == F),c("Sample","FractionalAbundance")]

plotData17$poissonPvalue = poissonRes$poissonPvalue[match(plotData17$Sample,poissonRes$Sample)]
plotData17$poissonRateRatio = poissonRes$poissonRateRatio[match(plotData17$Sample,poissonRes$Sample)]
plotData17$rateRatioCI95low = poissonRes$rateRatioCI95low[match(plotData17$Sample,poissonRes$Sample)]
plotData17$rateRatioCI95high = poissonRes$rateRatioCI95high[match(plotData17$Sample,poissonRes$Sample)]

ggplot(plotData17) +
         geom_quasirandom(aes(1, FractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
         geom_segment(y=mean(plotData17$FractionalAbundance),yend=mean(plotData17$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
         geom_hline(yintercept=mean(controlResMis2$FractionalAbundance,na.rm=T),color="darkgrey",linetype="dotted") +
         theme_classic() +
         ylab("L/(L+E) allele abundance %") +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_blank(),
               axis.ticks.x=element_blank(),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               axis.line.x = element_blank(),
               legend.position="bottom",
               legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
          #guides(colour=guide_legend(title.position = "top")) + 
          scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)

nrow(plotData17)

## [1] 35

nrow(plotData17[which(plotData17$poissonPvalue<0.05 & plotData17$FractionalAbundance>49.4),])

## [1] 0

nrow(plotData17[which(plotData17$poissonPvalue<0.05 & plotData17$FractionalAbundance<49.4),])

## [1] 31

mean(plotData17$FractionalAbundance)

## [1] 46.27143

Attact concentrations fo mis-sense SNP 2.

# add starting total RNA concentration
ddPCRresMis2$RNAconc = RNAconcMis2$Concentration[match(ddPCRresMis2$Sample,RNAconcMis2$Sample)]

Normalizing to input RNA amount. QuantaSoft also provides calculated number of template molecules per 20 ul well. Each 20 ul well contains 2 ul of template at concentration X ng/ul_H2O, meaning 2•X ng/ul_H2O total RNA per well. Copies per ng total input RNA is then simply \(\frac{copies per well}{2*RNAconc}\)

# Split ddPCR results into two based on channels
ddPCRch1 = ddPCRresMis2[which(ddPCRresMis2$TargetType == "Ch1Unknown"),]
ddPCRch2 = ddPCRresMis2[which(ddPCRresMis2$TargetType == "Ch2Unknown"),]

# calculation
ddPCRch1$Copies = as.numeric(ddPCRch1$CopiesPer20uLWell)/(ddPCRch1$RNAconc*2)
ddPCRch2$Copies = as.numeric(ddPCRch2$CopiesPer20uLWell)/(ddPCRch2$RNAconc*2)

# attach back to main data frame
plotData17$Ch1copies = ddPCRch1$Copies[match(plotData17$Sample,ddPCRch1$Sample)]
plotData17$Ch2copies = ddPCRch2$Copies[match(plotData17$Sample,ddPCRch2$Sample)]
plotData17$TotCopies = plotData17$Ch1copies + plotData17$Ch2copies 
head(plotData17)

##    Sample FractionalAbundance poissonPvalue poissonRateRatio rateRatioCI95low
## 1  284-Gn                46.2  4.028261e-03        0.8540371        0.7787323
## 3  301-Gn                47.3  4.199131e-03        0.8891630        0.8334008
## 6  329-Gn                47.6  3.757661e-03        0.9008410        0.8528048
## 7  330-Gn                46.9  9.195227e-03        0.8801711        0.8133884
## 8  333-Gn                48.3  1.185848e-01        0.9288378        0.8721419
## 11 593-Gn                44.5  1.480343e-18        0.7831328        0.7452175
##    rateRatioCI95high Ch1copies Ch2copies TotCopies
## 1          0.9364747  39.51220  46.09756  85.60976
## 3          0.9486070        NA        NA        NA
## 6          0.9515517  62.42236  68.63354 131.05590
## 7          0.9523497  60.50955  68.15287 128.66242
## 8          0.9891882  52.90520  56.57492 109.48012
## 11         0.8229271  42.11538  52.50000  94.61538

Can ASE account for EE-LL difference? Compare number of expressed molecules from each allele.

# average molecule difference between LL and EE
aveHomozDiff = mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "EE")],na.rm=T) -
       mean(combinedAssays$TotCopies[which(combinedAssays$Assay == "vgll3Mis2" & combinedAssays$VGLL3 == "LL")],na.rm=T)

# average molecule difference between L and E
aveAlleleDiff = mean(plotData17$Ch2copies[which(plotData17$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)] ,na.rm=T) - mean(plotData17$Ch1copies[which(plotData17$Sample %in% c("e28-DNA","G-gBlock","C-gBlock") == F)],na.rm=T)

# how much % does 2*average L-E difference account from LL-EE difference (in molecules)?
2*(aveAlleleDiff) / (aveHomozDiff) * 100

## [1] 83.21761

ASE in alternative 5’ UTR SNP

colnames(ddPCRresUTR) = iconv(colnames(ddPCRresUTR), to = "ASCII", sub = "u")
colnames(ddPCRresUTR) = c(colnames(ddPCRresUTR)[-2],"Empty")
colnames(ddPCRresUTR)[1] = "Well"

# assign falsely labeled well
ddPCRresUTR$Sample[which(ddPCRresUTR$Well == "D03")] = "437-Gn"
ddPCRresUTR$Sample[which(ddPCRresUTR$Well == "D09")] = "437-Gn"

# remove failed wells
ddPCRresUTR = ddPCRresUTR[which(ddPCRresUTR$AcceptedDroplets > 10000),]

# remove samples that could not be re-analyzed from non-diluted RNA samples
ddPCRresUTR = ddPCRresUTR[which(ddPCRresUTR$Sample %in% c("289-Gn","322-Gn","326-Gn","337-Gn","338-Gn","358-Gn","370-Gn","373-Gn","385-Gn","389-Gn","390-Gn") == F),]

plotData18 = ddPCRresUTR

LnegCountsHet = plotData18$Negatives[which(plotData18$TargetType == "Ch1Unknown")]
EnegCountsHet = plotData18$Negatives[which(plotData18$TargetType == "Ch2Unknown")]
LcountsHet = plotData18$Positives[which(plotData18$TargetType == "Ch1Unknown")]
EcountsHet = plotData18$Positives[which(plotData18$TargetType == "Ch2Unknown")]

# Initiate table for results of Poisson test
poissonResUTR = as.data.frame(matrix(data=NA,ncol=6,nrow=length(LcountsHet),
                                  dimnames=list(plotData18$Positives[which(plotData18$TargetType == "Ch1Unknown")],c("Sample","Lcounts","LcountsNeg","Ecounts","EcountsNeg","poissonPvalue"))))
poissonResUTR$Sample = plotData18$Sample[which(plotData18$TargetType == "Ch1Unknown")]
poissonResUTR$Lcounts = LcountsHet
poissonResUTR$Ecounts = EcountsHet
poissonResUTR$LcountsNeg = LnegCountsHet
poissonResUTR$EcountsNeg = EnegCountsHet

# Test ASE using Poisson rate ratio
poissonResUTR$poissonPvalue = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["p.value"]])
poissonResUTR$poissonRateRatio = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["estimate"]])
poissonResUTR$rateRatioCI95low = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["conf.int"]][1])
poissonResUTR$rateRatioCI95high = apply(poissonResUTR[,c("Lcounts","Ecounts","LcountsNeg","EcountsNeg")], 1, function(x) poisson.test(c(x[1],x[2]),c(x[3],x[4]))[["conf.int"]][2])

plotData19 = plotData18[which(plotData18$TargetType == "Ch1Unknown",plotData18$Sample %in% c("EL-gBlock","NTC") == F & is.na(plotData18$InverseFractionalAbundance) == F),c("Sample","InverseFractionalAbundance")]

plotData19$poissonPvalue = poissonResUTR$poissonPvalue[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$poissonRateRatio = poissonResUTR$poissonRateRatio[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$rateRatioCI95low = poissonResUTR$rateRatioCI95low[match(plotData19$Sample,poissonResUTR$Sample)]
plotData19$rateRatioCI95high = poissonResUTR$rateRatioCI95high[match(plotData19$Sample,poissonResUTR$Sample)]

ggplot(plotData19) +
         geom_quasirandom(aes(1, InverseFractionalAbundance,color=poissonPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
         geom_segment(y=mean(plotData19$InverseFractionalAbundance),yend=mean(plotData19$InverseFractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
         geom_hline(yintercept=50,color="darkgrey",linetype="dotted") +
         theme_classic() +
         ylab("L/(L+E) allele abundance %") +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_blank(),
               axis.ticks.x=element_blank(),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               axis.line.x = element_blank(),
               legend.position="bottom",
               legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
          #guides(colour=guide_legend(title.position = "top")) + 
          scale_color_gradient2(name = "ASE P-value", high="green", low="black", mid="green4", midpoint=0.4)

nrow(plotData19)

## [1] 35

nrow(plotData19[which(plotData19$poissonPvalue<0.05 & plotData19$InverseFractionalAbundance>50),])

## [1] 21

nrow(plotData19[which(plotData19$poissonPvalue<0.05 & plotData19$InverseFractionalAbundance<50),])

## [1] 5

mean(plotData19$InverseFractionalAbundance)

## [1] 54.00571

Combine ddPCR ASE results from URT and mis-sense SNPs 1 and 2 to the same figure.

plotData19$Assay = "UTR"
plotData19$FractionalAbundance = plotData19$InverseFractionalAbundance # dummy fractional abundance column
plotData17$Assay = "Mis2"
plotData9$Assay = "Mis1"

plotData20 = rbind(plotData19[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")], plotData17[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")], plotData9[,c("Sample","FractionalAbundance","poissonPvalue","poissonRateRatio","rateRatioCI95low","rateRatioCI95high","Assay")])

plotDataControl = data.frame(Assay = c("UTR","Mis1", "Mis2"), mean = c(50,mean(controlResMis1$FractionalAbundance,na.rm=T),mean(controlResMis2$FractionalAbundance,na.rm=T)))
plotDataControl$Assay = factor(plotDataControl$Assay,levels=c("UTR","Mis1","Mis2"))

assayLabels = c("UTR"="5' UTR", "Mis1"="Exon 2","Mis2"="Exon 3")

plotData20$Assay = factor(plotData20$Assay,levels=c("UTR","Mis1","Mis2"))

plotData20$ColorPvalue=1-plotData20$poissonPvalue
plotData20$ColorPvalue[which(plotData20$Assay == "UTR" & plotData20$FractionalAbundance<50)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "UTR" & plotData20$FractionalAbundance<50)])
plotData20$ColorPvalue[which(plotData20$Assay == "Mis1" & plotData20$FractionalAbundance<50.44)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "Mis1" & plotData20$FractionalAbundance<50.44)])
plotData20$ColorPvalue[which(plotData20$Assay == "Mis2" & plotData20$FractionalAbundance<49.4)] = -(plotData20$ColorPvalue[which(plotData20$Assay == "Mis2" & plotData20$FractionalAbundance<49.4)])

ggplot(plotData20) +
         geom_quasirandom(aes(1, FractionalAbundance,color=ColorPvalue),dodge.width=0.1,groupOnX=T,size=5,alpha=0.7) +
         #geom_segment(y=mean(plotData4$FractionalAbundance),yend=mean(plotData4$FractionalAbundance),x=0.8,xend=1.2,size=1.5,lineend="round") +
         geom_hline(data=plotDataControl,aes(yintercept=mean),color="darkgrey",linetype="dotted") +
         facet_wrap(~Assay,labeller=labeller(Assay=assayLabels)) +
         theme_classic() +
         ylab("L/(L+E) allele abundance %") +
         theme(axis.title.x=element_blank(),
               axis.text.x=element_blank(),
               axis.ticks.x=element_blank(),
               axis.text.y=element_text(size=14,color="black"),
               axis.title.y=element_text(size=14,color="black"),
               axis.line.x = element_blank(),
               strip.background=element_blank(),
               strip.text=element_text(size=14,color="black"),
               legend.position="bottom",
               legend.title = element_text(hjust=1,vjust=0.75,size=10,color="black")) +
          #guides(colour=guide_legend(title.position = "top")) + 
          scale_color_gradient2(name = "ASE P-value", high="red", low="blue", mid="grey", midpoint=0,labels=c(0,0.5,1,0.5,0))