###Trim (Reads were trimmed with trimmomatic): Trimmomatic -threads 8 -phred33 $fastqdir/${name}_R1_001.fastq.gz $fastqdir/${name}_R2_001.fastq.gz $OutFolder/fastq/${name2}_R1_001.fastq.gz $OutFolder/fastq/${name2}_singleton_R1.fastq.gz $OutFolder/fastq/${name2}_R2_001.fastq.gz $OutFolder/fastq/${name2}_singleton_R2.fastq.gz ILLUMINACLIP:$seq:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36 > logs/${name2}.trim.log ###STAR alignment (mapping step): STAR --limitSjdbInsertNsj 1800000 --genomeDir ${STARGenomeFolder} --runThreadN 8 --readFilesCommand zcat --readFilesIn $OutFolder/fastq/${name2}_R1_001.fastq.gz $OutFolder/fastq/${name2}_R2_001.fastq.gz --sjdbGTFfile ${GTF} --outFileNamePrefix ${name}_ --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outReadsUnmapped Fastx --outSAMunmapped Within samtools index ${name}_Aligned.sortedByCoord.out.bam ###HT-Seq (to calculate the number of reads uniquely mapped to each gene) HT-seq: htseq-count -s no -f bam ${name}_Aligned.sortedByCoord.out.bam ${GTF} > ${name}_HTseq-no.txt ###Correlation of gene expression levels among samples and the Euclidean distances among samples in DESeq 2 ###For the distance etc. are used these scripts: pdf(file=paste0(outputPrefix,"-heatmap.pdf")) topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 2000) vsdtop <- assay(vsd)[ topVarGenes,] topvsd.mat<-cor(vsdtop) write.csv(topvsd.mat,file=paste0(outputPrefix,"-sample-correlation.csv"),quote=FALSE) dist.mat <- sqrt(1-topvsd.mat^2) #print(rownames(dist.mat)) colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) # set colors #sampleanno <- data.frame(cellType=Metadata[,3],condition=Metadata[,2]) sampleanno<-subset(Metadata,select=-1) row.names(sampleanno)<-rownames(dist.mat) disTopvsdcor<-dist(as.matrix(dist.mat)) if (length(sampleanno[[opt$condition]])>36){ pheatmap(as.matrix(dist.mat), clustering_distance_rows=disTopvsdcor,clustering_distance_cols=disTopvsdcor, col=colors,annotation_row=sampleanno, annotation_colors=mycolor, annotation_col=sampleanno,show_rownames=F,show_colnames=F) }else{ pheatmap(as.matrix(dist.mat), clustering_distance_rows=disTopvsdcor,clustering_distance_cols=disTopvsdcor, col=colors,annotation_row=sampleanno, annotation_colors=mycolor, annotation_col=sampleanno,show_rownames=T,show_colnames=T) } dev.off() ###DEseq2 to calculate the DE tables: #Read in the count files: ddspre <- DESeqDataSetFromMatrix(countData = inputCount,colData=Metadata,design=~condition) ###use DEseq to calculate the DE table dds<-DESeq(ddspre) resultsDDS<-results(dds)