#installing Rsamtools
#install.packages("BiocManager")
#BiocManager::install("Rsamtools")

#load library
library(Rsamtools)

#working folder
currentFolder = ""

#working file (BAM file name only)
currentFile = ""

#name of the reference genome used during read alignment
referenceGenome = ""

#read BAM file
bam <- scanBam(paste(currentFolder, currentFile, ".bam", sep=""))

#function for collapsing the list of lists into a single list
#as per the Rsamtools vignette
.unlist <- function (x){
   ## do.call(c, ...) coerces factor to integer, which is undesired
   x1 <- x[[1L]]
   if (is.factor(x1)){
      structure(unlist(x), class = "factor", levels = levels(x1))
   } else {
      do.call(c, x)
   }
}

#store names of BAM fields
bam_field <- names(bam[[1]])

#go through each BAM field and unlist
list <- lapply(bam_field, function(y) .unlist(lapply(bam, "[[", y)))

#store as data frame
bam_df <- do.call("DataFrame", list)
names(bam_df) <- bam_field

#function for checking negative strand
check_neg <- function(x){
  if (intToBits(x)[5] == 1){
    return(T)
  } else {
    return(F)
  }
}

#test neg function with subset
test <- subset(bam_df, rname == referenceGenome)

table(apply(as.data.frame(test$flag), 1, check_neg))

#function for checking positive strand
check_pos <- function(x){
  if (intToBits(x)[3] == 1){
    return(F)
  } else if (intToBits(x)[5] != 1){
    return(T)
  } else {
    return(F)
  }
}

table(apply(as.data.frame(test$flag), 1, check_pos))

#store the mapped positions on the plus and minus strands
ref_neg <- bam_df[bam_df$rname == referenceGenome &
                    apply(as.data.frame(bam_df$flag), 1, check_neg),
                    'pos'
                   ]

ref_pos <- bam_df[bam_df$rname == referenceGenome &
                    apply(as.data.frame(bam_df$flag), 1, check_pos),
                    'pos'
                   ]

#calculate densities
ref_neg_density <- density(ref_neg, bw=1)
ref_pos_density <- density(ref_pos, bw=1)
 
#display negative strand with negative values
ref_neg_density$y <- ref_neg_density$y * -1

#cumulated density
ref_pos_density$y <- ref_pos_density$y - ref_neg_density$y
ref_pos_density$x <- ref_pos_density$x*0.000001
ref_pos_density$y <- ref_pos_density$y*1000

#reorder replicores and apply shift to centre the replication origin
replicoreX <- ref_pos_density$x[2:(length(ref_pos_density$x)/2-9)]
replicoreY <- ref_pos_density$y[2:(length(ref_pos_density$y)/2-9)]
rightReplicore <- data.frame(replicoreX, replicoreY)
replicoreX <- ref_pos_density$x[(length(ref_pos_density$x)/2-8):(length(ref_pos_density$x)-1)]
genomeLength <- replicoreX[length(replicoreX)]
replicoreX <- replicoreX-genomeLength
replicoreY <- ref_pos_density$y[(length(ref_pos_density$y)/2-8):(length(ref_pos_density$y)-1)]
leftReplicore <- data.frame(replicoreX, replicoreY)
finalData <- rbind(leftReplicore, rightReplicore)

#how many good reads?
numberGoodReads = table(bam_df$rname == referenceGenome & bam_df$flag == 16)

#plot results
tiff(paste(currentFolder, currentFile, "_density.tiff", sep=""), compression="zip", width=600, height=600, unit="px")
plot(finalData, xlab="Chromosome position (MB)", ylab=expression("Read coverage density (10"^-3*")"), col="grey", lwd=2.5, type="l", las=1, ylim=c(0.65,3), xaxt="n", log="y")
xtick<-seq(-2, 2, by=1)
axis(side=1, at=xtick, labels = FALSE)
ytick<-seq(1, 3, by=1)
axis(side=2, at=ytick, labels = FALSE)
axis(1, cex.axis=1.5)
dev.off()

#store density data
write.csv(finalData, paste(currentFolder, currentFile, "_data.csv", sep=""))


