
###Trim (Reads were trimmed with trimmomatic):

Trimmomatic  -threads 8 -phred33 $fastqdir/${name}_R1_001.fastq.gz  $fastqdir/${name}_R2_001.fastq.gz  $OutFolder/fastq/${name2}_R1_001.fastq.gz  $OutFolder/fastq/${name2}_singleton_R1.fastq.gz  $OutFolder/fastq/${name2}_R2_001.fastq.gz  $OutFolder/fastq/${name2}_singleton_R2.fastq.gz ILLUMINACLIP:$seq:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36 > logs/${name2}.trim.log

###STAR alignment (mapping step):

STAR --limitSjdbInsertNsj 1800000 --genomeDir ${STARGenomeFolder} --runThreadN 8 --readFilesCommand zcat --readFilesIn $OutFolder/fastq/${name2}_R1_001.fastq.gz $OutFolder/fastq/${name2}_R2_001.fastq.gz --sjdbGTFfile ${GTF} --outFileNamePrefix ${name}_ --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outReadsUnmapped Fastx --outSAMunmapped Within

samtools index ${name}_Aligned.sortedByCoord.out.bam

###HT-Seq (to calculate the number of reads uniquely mapped to each gene)
HT-seq: htseq-count -s no -f bam  ${name}_Aligned.sortedByCoord.out.bam ${GTF} > ${name}_HTseq-no.txt

###Correlation of gene expression levels among samples and the Euclidean distances among samples in DESeq 2
###For the distance etc. are used these scripts:
pdf(file=paste0(outputPrefix,"-heatmap.pdf"))

topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 2000)

vsdtop  <- assay(vsd)[ topVarGenes,]

topvsd.mat<-cor(vsdtop)

write.csv(topvsd.mat,file=paste0(outputPrefix,"-sample-correlation.csv"),quote=FALSE)

dist.mat <- sqrt(1-topvsd.mat^2)

#print(rownames(dist.mat))

colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) # set colors

#sampleanno <- data.frame(cellType=Metadata[,3],condition=Metadata[,2])

sampleanno<-subset(Metadata,select=-1)

row.names(sampleanno)<-rownames(dist.mat)

disTopvsdcor<-dist(as.matrix(dist.mat))

if (length(sampleanno[[opt$condition]])>36){

pheatmap(as.matrix(dist.mat),

clustering_distance_rows=disTopvsdcor,clustering_distance_cols=disTopvsdcor,

col=colors,annotation_row=sampleanno,

annotation_colors=mycolor,

annotation_col=sampleanno,show_rownames=F,show_colnames=F)

}else{

pheatmap(as.matrix(dist.mat),

clustering_distance_rows=disTopvsdcor,clustering_distance_cols=disTopvsdcor,

col=colors,annotation_row=sampleanno,

annotation_colors=mycolor,

annotation_col=sampleanno,show_rownames=T,show_colnames=T)

}

dev.off()

###DEseq2 to calculate the DE tables:

#Read in the count files:

ddspre <- DESeqDataSetFromMatrix(countData = inputCount,colData=Metadata,design=~condition)

###use DEseq to calculate the DE table

dds<-DESeq(ddspre)

resultsDDS<-results(dds)