#Analysis of MinION RNA-seq performance

#Loading of annotation file from Biomart Ensembl 97 database (GRCh38.p12)
Annotation <- read.table("ANNOTATIONFILE.csv", sep = ",", header = TRUE)

#Total dataset DS100
#Loading of raw counts (first col: gene ID)
rawcounts_DS100 <- read.table("FILENAME.csv", sep = ";")

#Gene name matching
myindex <- as.data.frame(match(rawcounts_DS100[,1], Annotation[,1]))
NAind <- which(is.na(myindex))
if (isEmpty(NAind)){
  myindex<-myindex
  my <- my
} else {
  myindex <- myindex[-NAind,]
  my <- my[-NAind,]
}
myindex <- match(my[,1], Annotation[,1])
mygene <- as.data.frame(Annotation[myindex, c(2,3)])
rawdata <- cbind(mygene, my)
colnames(rawdata) <- c("Gene.name", "Gene.type", "ID", "MeanExpr")

#Computing of log2(CPM)
rawdata$MeanExpr<- rawdata$MeanExpr+1
#Filtering genes with raw count lower than 3
ind_inf3 <- which(rawdata$MeanExpr > 3)
filt3_restot <- rawdata[ind_inf3,]
denCPM <- sum(as.numeric(filt3_restot$MeanExpr))
CPMtot <- cbind(filt3_restot, log2((as.numeric(filt3_restot$MeanExpr)/denCPM)*1000000))
colnames(CPMtot) <- c("Gene.name", "Gene.type", "ID", "MeanExpr", "log2(CPM)")
