# Measurement error paper results and figures # Daniel Nettle, October 2018, revised March 2019 # Run 'simulation.functions.r' before using this script # External libraries library(psych) library(moments) library(fields) library(dplyr) library(normtest) ##Validating and parameterizing the simulation framework#### # Get dataset 1 d=read.csv("dataset1.csv") # Just light cycler 1 lc1=subset(d, LightCycler.Instrument.ID==1) #Telomere Cqs # Reshape the dataset first telo.cqs=select(lc1, Telo.36B4.Plate.Set, TeloCq1, TeloCq2, TeloCq3) t=reshape(telo.cqs, direction="long", varying=list(c("TeloCq1", "TeloCq2", "TeloCq3")), v.names="TeloCq") t$zTeloCq = t$TeloCq- mean(t$TeloCq) sd(t$TeloCq)/sd(lc1$MeanTeloCq) sd(t$TeloCq) describeBy(t$TeloCq, t$Telo.36B4.Plate.Set, mat=T, digits=3) # Single copy gene # Reshape the dataset first scg.cqs=select(lc1, Telo.36B4.Plate.Set, scgCq1, scgCq2, scgCq3) s=reshape(scg.cqs, direction="long", varying=list(c("scgCq1", "scgCq2", "scgCq3")), v.names="scgCq") s$zscgCq = s$scgCq- mean(s$scgCq) sd(s$scgCq)/sd(lc1$MeanscgCq) sd(s$scgCq) describeBy(s$scgCq, s$Telo.36B4.Plate.Set, mat=T, digits=3) # Plot figure 1 png("figure1.png", res=600, units="in", width=8, height=4) par(mfrow=c(1, 2)) hist(t$TeloCq, breaks=20, xlim=c(9, 10), freq=F, density=50, xlab="Cq value", main="A", ylim=c(0, 8)) curve(dnorm(x, mean(t$TeloCq), sd(t$TeloCq)), add=T, col="red") hist(s$scgCq[s$Telo.36B4.Plate.Set=="A"], breaks=20, xlim=c(22.2, 23.2), freq=F, density=90, xlab="Cq value", main="B", col=rgb(0.8, 0.8, 0.8, 1), ylim=c(0, 8)) hist(s$scgCq[s$Telo.36B4.Plate.Set=="B"], breaks=20, xlim=c(22.2, 23.2), freq=F, density=50, add=T, angle=135, col=rgb(0.3,0.3,0.3,0.5)) curve(dnorm(x, mean(s$scgCq[s$Telo.36B4.Plate.Set=="A"]), sd(s$scgCq[s$Telo.36B4.Plate.Set=="A"])), add=T, col="red") curve(dnorm(x, mean(s$scgCq[s$Telo.36B4.Plate.Set=="B"]), sd(s$scgCq[s$Telo.36B4.Plate.Set=="B"])), add=T, col="red", lty=2) dev.off() ###Consequences of error in Cq for TS ratio values#### d=generate.one.dataset(n=10000, error.telo=0.05, error.scg=0.05, var.sample.size=1, telomere.var=0.1) psych::describe(d$error.for.telo/sd(d$true.telo.cq)) psych::describe(d$error.for.scg/sd(d$true.cq.scg)) psych::describe(d$error.computed/sd(d$true.ts)) # No skewness or kurtosis in the errors of Cq agostino.test(d$error.for.telo/sd(d$true.telo.cq)) kurtosis(d$error.for.telo/sd(d$true.telo.cq), 1000) kurtosis.norm.test(d$error.for.telo/sd(d$true.telo.cq), 1000) agostino.test(d$error.for.scg/sd(d$true.cq.scg)) kurtosis.norm.test(d$error.for.scg/sd(d$true.cq.scg), 1000) # But skewness and kurtosis in the errors of TS ratio agostino.test(d$error.computed/sd(d$true.ts)) kurtosis.norm.test(d$error.computed/sd(d$true.ts), 1000) # Figure 2 d=generate.one.dataset(n=1000, error.telo=0.05, error.scg=0.05, var.sample.size=1, telomere.var=0.1) png("figure2.png", res=300, units="in", width=8, height=3) par(mfrow=c(1, 3)) hist(d$error.for.telo/sd(d$true.telo.cq),main="A", xlim=c(-2, 2), xlab="Error measured telomere Cq", ylim=c(0, 50), breaks=seq(-2, 2, by=0.02), las=1) abline(v=mean(d$error.for.telo), col="red", lty=2) hist(d$error.for.scg/sd(d$true.cq.scg), xlim=c(-2, 2), main="B", xlab="Error measured single copy Cq", ylim=c(0, 50), breaks=seq(-2, 2, by=0.02), las=1) abline(v=mean(d$error.for.scg), col="red", lty=2) hist(d$error.computed/sd(d$true.ts), main="C", xlim=c(-2, 2), breaks=seq(-2, 2, by=0.02), xlab="Error measured TS", las=1, ylim=c(0, 50)) abline(v=mean(d$error.computed), col="red", lty=2) dev.off() ###Relationship between measured T/S ratio and relative telomere length#### d1=generate.one.dataset(n=1000, error.telo=0.0, error.scg=0.0, var.sample.size=1, telomere.var=0.1) d2=generate.one.dataset(n=1000, error.telo=0.05, error.scg=0.05, var.sample.size=1, telomere.var=0.1) d3=generate.one.dataset(n=1000, error.telo=0.15, error.scg=0.15, var.sample.size=1, telomere.var=0.1) png("figure3.png", res=300, units="in", width=9, height=6) par(mfrow=c(2, 3)) par(oma=c(2, 1, 1, 2)) plot(d1$measured.ts~d1$true.telo.var, main="A", xlab="True telomere length", ylab="Measured TS", ylim=c(0, 2.5), las=1) abline(lm(d1$true.ts~d1$true.telo.var), col="red", lty=2) text(1.15, 2.25, expression(paste("Error ", sigma, " = 0"))) plot(d2$measured.ts~d2$true.telo.var, main="B", xlab="True telomere length", ylab="Measured TS", ylim=c(0, 2.5), las=1) abline(lm(d2$true.ts~d2$true.telo.var), col="red", lty=2) text(1.15, 2.25, expression(paste("Error ", sigma, " = 0.05"))) plot(d3$measured.ts~d3$true.telo.var, main="C", xlab="True telomere length", ylab="Measured TS", ylim=c(0, 2.5), las=1) abline(lm(d3$true.ts~d3$true.telo.var), col="red", lty=2) text(1.15, 2.25, expression(paste("Error ", sigma, " = 0.15"))) # Second row - how does error relate to true relative telomere length? plot(abs(d1$error.computed)~d1$true.telo.var, main="D", las=1, xlab="True telomere length", ylab="Absolute error measured TS", ylim=c(0, 1)) abline(lm(abs(d1$error.computed)~d1$true.telo.var), col="red", lty=2) text(1.15, 0.9, expression(paste("Error ", sigma, " = 0"))) plot(abs(d2$error.computed)~d2$true.telo.var, main="E", las=1, xlab="True telomere length", ylab="Absolute error measured TS", ylim=c(0, 1)) abline(lm(abs(d2$error.computed)~d2$true.telo.var), col="red", lty=2) text(1.15, 0.9, expression(paste("Error ", sigma, " = 0.05"))) plot(abs(d3$error.computed)~d3$true.telo.var, main="F", las=1, xlab="True telomere length", ylab="Absolute error measured TS", ylim=c(0, 1)) abline(lm(abs(d3$error.computed)~d3$true.telo.var), col="red", lty=2) text(1.15, 0.9, expression(paste("Error ", sigma, " = 0.15"))) dev.off() ###Repeatability of the measured T/S ratio#### # First generate the datasets ss=10000 step=0.005 telo.errors=NULL scg.errors=NULL reliabilities=NULL i.seq=seq(0, 0.3, by=step) j.seq=seq(0, 0.3, by=step) counter=0 icounter=0 jcounter=0 for (i in i.seq) { icounter=icounter+1 print(icounter) for (j in j.seq){ jcounter=jcounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = j reliabilities[counter] = calculate.repeatability(n=ss, error.telo = i, error.scg = j, error.cor=0) } } output1=data.frame(telo.errors, scg.errors, reliabilities) output.matrix1=array(reliabilities, dim=c(length(i.seq), length(j.seq))) # Now make the figure par(mfrow=c(1, 1)) par(oma=c(2, 1, 1, 2)) par(mar=c(6.1, 4.1, 4.1, 2.1)) image.plot(output.matrix1, x=i.seq, y=i.seq, las=1, xlab=expression(paste("Error ", sigma, " Cq telomere")), ylab=expression(paste("Error ", sigma, " Cq single copy")), main="A", zlim=c(0,1)) contour(x=seq(0, 0.3, by=step), y=seq(0, 0.3, by=step), z=output.matrix1, levels=c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4), add=TRUE, col="white", labcex=1.1, lwd=2) #####Comparative repeatabilities of Cq and TS ##### sigmas.scg=seq(0, 0.3, by=0.01) sigmas.telo=rep(0.05, times=length(sigmas.scg)) answer=array(dim=c(length(sigmas.telo), 3)) for (i in 1:length(sigmas.telo)) { answer[i , ]=compare.repeatability(error.telo = sigmas.telo[i], error.scg=sigmas.scg[i]) } data=as.data.frame(answer) colnames(data)=c("telo.rel", "scg.rel", "ts.rel") plot(data$ts.rel~seq(0, 0.3, by=0.01), las=1, type="o", xlab=expression(paste("Error ", sigma, " single-copy gene")), ylab="Repeatability (ICC)", pch=19) lines(data$telo.rel~seq(0, 0.3, by=0.01), col="red", lty=2) points(data$telo.rel~seq(0, 0.3, by=0.01),col="red", pch=21) # Make the combined figure 4 png("figure4.png", res=300, units="in", width=12, height=6) par(mfrow=c(1, 2)) par(oma=c(4, 2, 2, 2)) par(mar=c(4.1, 7.1, 4.1, 2.1)) image.plot(output.matrix1, x=i.seq, y=i.seq, las=1, xlab=expression(paste("Error ", sigma, " Cq telomere")), ylab=expression(paste("Error ", sigma, " Cq single copy")), main="A", zlim=c(0,1)) contour(x=seq(0, 0.3, by=step), y=seq(0, 0.3, by=step), z=output.matrix1, levels=c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4), add=TRUE, col="white", labcex=1.1, lwd=2) plot(data$ts.rel~seq(0, 0.3, by=0.01), las=1, type="o", xlab=expression(paste("Error ", sigma, " single-copy gene")), ylab="Repeatability (ICC)", pch=19, main="B") lines(data$telo.rel~seq(0, 0.3, by=0.01), col="red", lty=2) points(data$telo.rel~seq(0, 0.3, by=0.01),col="red", pch=21) dev.off() ###Regression to the mean and correlation between time points in longitudinal data##### png("figure5.png", res=300, units="in", width=9, height=6) # Regression to the mean plots par(mfrow=c(2, 3)) par(oma=c(2, 1, 1, 2)) d1 = generate.repeated.measure(n=1000, error.telo=0, error.scg=0) lims=c(0, 2) plot(d1$measured.ts.2 ~ d1$measured.ts.1, xlim=lims, ylim=lims, las=1, xlab="TS time 1", ylab="TS time 2", main="A") abline(a=0, b=1) abline(lm(d1$measured.ts.2 ~ d1$measured.ts.1), col="red", lty=2) text(1.5, 0.25, expression(paste("Error ", sigma, " = 0"))) d2 = generate.repeated.measure(n=1000, error.telo=0.05, error.scg=0.05) plot(d2$measured.ts.2 ~ d2$measured.ts.1, xlim=lims, ylim=lims, las=1, xlab="TS time 1", ylab="TS time 2", main="B") abline(a=0, b=1) text(1.5, 0.25, expression(paste("Error ", sigma, " = 0.05"))) abline(lm(d2$measured.ts.2 ~ d2$measured.ts.1), col="red", lty=2) d3 = generate.repeated.measure(n=1000, error.telo=0.15, error.scg=0.15) plot(d3$measured.ts.2 ~ d3$measured.ts.1, xlim=lims, ylim=lims, las=1, xlab="TS time 1", ylab="TS time 2", main="C") abline(a=0, b=1) abline(lm(d3$measured.ts.2 ~ d3$measured.ts.1), col="red", lty=2) text(1.5, 0.25, expression(paste("Error ", sigma, " = 0.15"))) # How does beta of time 1 measure as a predictor of time 2 measure change with increasing measurement error? d=generate.repeated.measure(n=10000, error.telo = 0, error.scg = 0, error.cor=0) # Function to return the values needed make.my.beta=function(current.cor) { telo.errors=NULL scg.errors=NULL betas=NULL i.seq=seq(0.01, 0.3, by=0.01) counter=0 icounter=0 for (i in i.seq) { icounter=icounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = i d=generate.repeated.measure(n=10000, error.telo = i, error.scg = i, error.cor=current.cor) betas[counter] = cor(d$measured.ts.2, d$measured.ts.1) } output=data.frame(telo.errors, scg.errors, betas) return(output) } # Generate the data output1=make.my.beta(current.cor=0) # Make the plot plot(output1$betas~output1$telo.errors, type="o", pch=20, xlab=expression(paste("Error ", sigma)), ylab=expression(paste("Correlation between t1 and t2")), las=1, main="D") # How strongly does length at t1 predict attrition? # Function to return the values needed make.my.attrition.beta=function(current.cor) { telo.errors=NULL scg.errors=NULL betas=NULL i.seq=seq(0.01, 0.3, by=0.01) counter=0 icounter=0 for (i in i.seq) { icounter=icounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = i d=generate.repeated.measure(n=10000, error.telo = i, error.scg = i, error.cor=current.cor) d$attrition=d$measured.ts.2-d$measured.ts.1 betas[counter] = cor(d$measured.ts.1, d$attrition) } output=data.frame(telo.errors, scg.errors, betas) return(output) } # Generate the data output1=make.my.attrition.beta(current.cor=0) # Make the plot plot(output1$betas~output1$telo.errors, type="o", pch=20, xlab=expression(paste("Error ", sigma)), ylab=expression("Correlation between t1 and change"), las=1, ylim=c(0, -1), main="E") # Get dataset 2 h=read.csv("dataset2.csv") # Sixth panel output1a=make.my.beta(current.cor=0) output1b=make.my.attrition.beta(current.cor=0) plot(output1a$betas, output1b$betas, type="o", las=1, xlab="Correlation between t1 and t2", ylab="Correlation between t1 and change", xlim=c(-0.1,1), ylim=c(0, -1), main="F", pch=20) points(h$correlation.bl.fu[h$method=="qPCR"], -h$correlation.TA[h$method=="qPCR"], col="red", pch=19) points(h$correlation.bl.fu[h$method=="TRF"], -h$correlation.TA[h$method=="TRF"], col="blue", pch=17) # End figure 5 dev.off() ###Supplementary analyses: Correlation between the errors###### ss=10000 step=0.005 # No correlation between the errors telo.errors=NULL scg.errors=NULL repeatabilities=NULL i.seq=seq(0, 0.3, by=step) j.seq=seq(0, 0.3, by=step) counter=0 icounter=0 jcounter=0 for (i in i.seq) { icounter=icounter+1 print(icounter) for (j in j.seq){ jcounter=jcounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = j repeatabilities[counter] = calculate.repeatability(n=ss, error.telo = i, error.scg = j, error.cor=0) } } output1=data.frame(telo.errors, scg.errors, repeatabilities) output.matrix1=array(repeatabilities, dim=c(length(i.seq), length(j.seq))) # Weak correlation between the errors telo.errors=NULL scg.errors=NULL repeatabilities=NULL i.seq=seq(0, 0.3, by=step) j.seq=seq(0, 0.3, by=step) counter=0 icounter=0 jcounter=0 for (i in i.seq) { icounter=icounter+1 print(icounter) for (j in j.seq){ jcounter=jcounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = j repeatabilities[counter] = calculate.repeatability(n=ss, error.telo = i, error.scg = j, error.cor=0.3) } } output2=data.frame(telo.errors, scg.errors, repeatabilities) output.matrix2=array(repeatabilities, dim=c(length(i.seq), length(j.seq))) # Strong positive correlation between the errors telo.errors=NULL scg.errors=NULL repeatabilities=NULL i.seq=seq(0, 0.3, by=step) j.seq=seq(0, 0.3, by=step) counter=0 icounter=0 jcounter=0 for (i in i.seq) { icounter=icounter+1 print(icounter) for (j in j.seq){ jcounter=jcounter+1 counter=counter+1 telo.errors[counter] = i scg.errors[counter] = j repeatabilities[counter] = calculate.repeatability(n=ss, error.telo = i, error.scg = j, error.cor=0.7) } } output3=data.frame(telo.errors, scg.errors, repeatabilities) output.matrix3=array(repeatabilities, dim=c(length(i.seq), length(j.seq))) # Now make the figure png("figureS1.png", res=300, units="in", width=12, height=4) par(mfrow=c(1, 3)) par(oma=c(2, 1, 1, 2)) par(mar=c(6.1, 4.1, 4.1, 2.1)) # First panel image.plot(output.matrix1, x=i.seq, y=i.seq, las=1, xlab=expression(paste("Error ", sigma, " Cq telomere")), ylab=expression(paste("Error ", sigma, " Cq single copy")), main="A", zlim=c(0,1)) contour(x=seq(0, 0.3, by=step), y=seq(0, 0.3, by=step), z=output.matrix1, levels=c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4), add=TRUE, col="white", labcex=1.1, lwd=2) # Second panel image.plot(output.matrix2, x=i.seq, y=i.seq, las=1, xlab=expression(paste("Error ", sigma, " Cq telomere")), ylab=" ", main="B", zlim=c(0,1)) contour(x=seq(0, 0.3, by=step), y=seq(0, 0.3, by=step), z=output.matrix2, levels=c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4), add=TRUE, col="white", labcex=1.1, lwd=2) # Third panel image.plot(output.matrix3, x=i.seq, y=i.seq, las=1, xlab=expression(paste("Error ", sigma, " Cq telomere")), ylab=" ", main="C", zlim=c(0,1)) contour(x=seq(0, 0.3, by=step), y=seq(0, 0.3, by=step), z=output.matrix3, levels=c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4), add=TRUE, col="white", labcex=1.1, lwd=2) dev.off()