# R commands for replicating the results of Lin et al (2014), as well as additional plots using the same data # The commands should be run from the directory in which the data files reside #load input datasets = as.data.frame(scan("Stanford_datasets.txt",list(setname="",seqBatch="",species="",tissue=""),sep="\t")) fpkmMat <- as.matrix(read.table('Stanford_datasets_fpkmMat.txt',header=FALSE,sep='\t')) # perform log transformation logTransformed_fpkmMat = log2(fpkmMat+1) colnames(logTransformed_fpkmMat) <- datasets$setname #plot correlation heatmaps library(pheatmap) pheatmap(cor(logTransformed_fpkmMat)) #pearson correlation; complete linkage pheatmap(cor(logTransformed_fpkmMat),clustering_method="average") #pearson correlation; average linkage pheatmap(cor(logTransformed_fpkmMat),clustering_method="single") #pearson correlation; single linkage pheatmap(cor(logTransformed_fpkmMat),clustering_distance_rows="manhattan",clustering_distance_cols="manhattan") #pearson correlation; manhattan distance, complete linkage pheatmap(cor(logTransformed_fpkmMat),clustering_distance_rows="canberra",clustering_distance_cols="canberra") #pearson correlation; canberra distance, complete linkage pheatmap(cor(logTransformed_fpkmMat,method="spearman")) #spearman correlation; complete linkage pheatmap(logTransformed_fpkmMat, clustering_distance_cols="correlation", show_rownames = FALSE) # clustering of log-transformed FPKM, Pearson correlation distance, complete linkage #pca transposeLogTransformed_fpkmMat = t(logTransformed_fpkmMat) # perform prcomp on the transposed matrix from which columns (genes) of zero variance have been removed pca_proc <- prcomp(transposeLogTransformed_fpkmMat[,apply(transposeLogTransformed_fpkmMat, 2, var, na.rm=TRUE) != 0],scale=TRUE,center=TRUE,retX=TRUE) summary(pca_proc) #Importance of components: # PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 #Standard deviation 54.2999 43.5061 39.3992 35.81511 31.77971 25.86337 24.77243 22.33937 21.69784 21.1730 19.82838 18.28041 17.41969 17.00162 16.05240 #Proportion of Variance 0.2012 0.1292 0.1059 0.08755 0.06893 0.04565 0.04188 0.03406 0.03213 0.0306 0.02683 0.02281 0.02071 0.01973 0.01759 #Cumulative Proportion 0.2012 0.3304 0.4364 0.52391 0.59284 0.63849 0.68037 0.71443 0.74656 0.7772 0.80399 0.82680 0.84751 0.86724 0.88483 # PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24 PC25 PC26 #Standard deviation 15.65210 15.16221 14.56541 13.08324 12.96681 12.29599 12.07214 11.38150 11.18347 10.47129 5.635e-14 #Proportion of Variance 0.01672 0.01569 0.01448 0.01168 0.01148 0.01032 0.00995 0.00884 0.00854 0.00748 0.000e+00 #Cumulative Proportion 0.90155 0.91724 0.93172 0.94340 0.95487 0.96519 0.97514 0.98398 0.99252 1.00000 1.000e+00 plotData = datasets[,c("setname","species","tissue")] plotData$PC1 <- pca_proc$x[,1] plotData$PC2 <- pca_proc$x[,2] plotData$PC3 <- pca_proc$x[,3] plotData$PC4 <- pca_proc$x[,4] plotData$PC5 <- pca_proc$x[,5] plotData$PC6 <- pca_proc$x[,6] library(ggplot2) # 2D plots of pairs of principal components qplot(PC1,PC2,data=plotData,color=species,shape=tissue,xlab="PC1 (20% variability)",ylab="PC2 (13% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC1,PC3,data=plotData,color=species,shape=tissue,xlab="PC1 (20% variability)",ylab="PC3 (11% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC2,PC3,data=plotData,color=species,shape=tissue,xlab="PC2 (13% variability)",ylab="PC3 (11% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC4,PC5,data=plotData,color=species,shape=tissue,xlab="PC4 (9% variability)",ylab="PC5 (7% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC4,PC6,data=plotData,color=species,shape=tissue,xlab="PC4 (9% variability)",ylab="PC6 (5% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC5,PC6,data=plotData,color=species,shape=tissue,xlab="PC5 (7% variability)",ylab="PC6 (5% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) # 2D plots of individual principle components against tissue qplot(PC1,tissue,data=plotData,color=species,shape=tissue,xlab="PC1 (20% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC2,tissue,data=plotData,color=species,shape=tissue,xlab="PC2 (13% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC3,tissue,data=plotData,color=species,shape=tissue,xlab="PC3 (11% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC4,tissue,data=plotData,color=species,shape=tissue,xlab="PC4 (9% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC5,tissue,data=plotData,color=species,shape=tissue,xlab="PC5 (7% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) qplot(PC6,tissue,data=plotData,color=species,shape=tissue,xlab="PC6 (5% variability)")+scale_shape_manual(values=c("adipose"=0,"adrenal"=1,"brain"=2,"sigmoid"=3,"heart"=4,"kidney"=5,"liver"=6,"lung"=7,"ovary"=8,"pancreas"=9,"small_bowel"=10,"spleen"=11,"testis"=12)) # testing for significance of correlations between the matched tissues PC values of human and mouse cor.test(plotData$PC1[1:13],plotData$PC1[14:26],method="spearman") # rho=0.6318681 ; p-value = 0.02374 cor.test(plotData$PC2[1:13],plotData$PC2[14:26],method="spearman") # rho=0.7362637 ; p-value = 0.005789 cor.test(plotData$PC3[1:13],plotData$PC3[14:26],method="spearman") # rho=0.543956 ; p-value= 0.05813 cor.test(plotData$PC4[1:13],plotData$PC4[14:26],method="spearman") # rho=0.9340659 ; p-value < 2.2e-16 cor.test(plotData$PC5[1:13],plotData$PC5[14:26],method="spearman") # rho=0.8296703 ; p-value = 0.0007779 cor.test(plotData$PC6[1:13],plotData$PC6[14:26],method="spearman") # rho=0.6923077 ; p-value = 0.01115 cor.test(plotData$PC1[1:13],plotData$PC1[14:26],method="pearson") # cor=0.3963364 ; p-value = 0.18 cor.test(plotData$PC2[1:13],plotData$PC2[14:26],method="pearson") # cor=0.8166208 ; p-value = 0.000658 cor.test(plotData$PC3[1:13],plotData$PC3[14:26],method="pearson") # cor=0.2450217 ; p-value = 0.4198 cor.test(plotData$PC4[1:13],plotData$PC4[14:26],method="pearson") # cor = 0.9097347 ; p-value=1.609e-05 cor.test(plotData$PC5[1:13],plotData$PC5[14:26],method="pearson") # cor = 0.8646415; p-value= 0.0001365 cor.test(plotData$PC6[1:13],plotData$PC6[14:26],method="pearson") #cor=0.7933395 ; p-value = 0.00121