#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;

=pod

=head1 Usage
	perl $0 [option] <bam> <outdir>
		-q	base quality
		-Q	mapping quality
		-l	genome size
		-h	help

=cut
		

my ($basethres,$mapQthres,$total_chr,$help);
GetOptions("q:i"=>\$basethres,"Q:i"=>\$mapQthres,"l:i"=>\$total_chr,"h"=>\$help);

$basethres ||= 0;
$mapQthres ||= 0;
die `pod2text $0` if(!$total_chr || $help);

my $bam=shift;
my $outdir=shift;
my $samApp = "samtools";
my $RApp = "R";
mkdir $outdir unless -d $outdir;

open DEPTH,"$samApp depth -q $basethres -Q $mapQthres $bam | " or die;

my %depth=();
my $maxCov=0;
my $Average_sequencing_depth=0;
my $Average_sequencing_depth4=0;
my $Average_sequencing_depth10=0;
my $Average_sequencing_depth20=0;
my $Coverage=0;
my $Coverage4=0;
my $Coverage10=0;
my $Coverage20=0;

my $Coverage_bases=0;
my $Coverage_bases_4=0;
my $Coverage_bases_10=0;
my $Coverage_bases_20=0;

my $total_Coverage_bases=0;
my $total_Coverage_bases_4=0;
my $total_Coverage_bases_10=0;
my $total_Coverage_bases_20=0;

my$total_aa=0;
while(<DEPTH>)
{
	chomp;
	my @arr = split;
        if($arr[2]>=1){
            $depth{$arr[2]}+=1;
            $total_aa++;
        }
}
close DEPTH;

my @depth=sort {$a<=>$b} keys %depth;

open HIS,">$outdir/depth_frequency.txt" or die;
open CUM,">$outdir/cumu.txt" or die;

foreach my $depth1 (sort {$a<=>$b} keys %depth)
{
	next if($depth1==0);
	my $per=$depth{$depth1}/$total_chr;
	$total_Coverage_bases += $depth1*$depth{$depth1};
	$Coverage_bases += $depth{$depth1};

	if($depth1>=4)	
	{
		$total_Coverage_bases_4 += $depth1 * $depth{$depth1};
		$Coverage_bases_4 += $depth{$depth1};
	}
	if($depth1>=10)
	{
		$total_Coverage_bases_10 += $depth1 * $depth{$depth1};
		$Coverage_bases_10 += $depth{$depth1};
	}
	if($depth1>=20)
	{
		$total_Coverage_bases_20 += $depth1 * $depth{$depth1};
 		$Coverage_bases_20 += $depth{$depth1};
	}



	$maxCov=$per if($maxCov<$per);
	my $tmp=0;
	print HIS "$depth1\t$per\n";
	foreach my $depth2(@depth)
	{
		$tmp+=$depth{$depth2} if($depth2 >= $depth1); 
	}
	$tmp=$tmp/$total_chr;
	print CUM "$depth1\t$tmp\n";
}

$Average_sequencing_depth=$total_Coverage_bases/$total_aa;
$Coverage=$Coverage_bases/$total_chr;
$Average_sequencing_depth4=$total_Coverage_bases_4/$total_aa;
$Coverage4=$Coverage_bases_4/$total_chr;
$Average_sequencing_depth10=$total_Coverage_bases_10/$total_aa;
$Coverage10=$Coverage_bases_10/$total_chr;
$Average_sequencing_depth20=$total_Coverage_bases_20/$total_aa;
$Coverage20=$Coverage_bases_20/$total_chr;

open SUM,">$outdir/summary.txt" or die $!;
print SUM "Average_sequencing_depth:\t",sprintf("%.2f",$Average_sequencing_depth),"\n";
print SUM "Coverage:\t",sprintf("%.2f%%",100*$Coverage),"\n";
print SUM "Coverage_at_least_4X:\t",sprintf("%.2f%%",100*$Coverage4),"\n";
print SUM "Coverage_at_least_10X:\t",sprintf("%.2f%%",100*$Coverage10),"\n";
print SUM "Coverage_at_least_20X:\t",sprintf("%.2f%%",100*$Coverage20),"\n";
close SUM;
close HIS;
close CUM;

if(1)
{
	my $ylim = 100*$maxCov;
	my ($xbin,$ybin);
	$ylim= int($ylim) + 1;
	if($ylim <= 3)
	{
		$ybin = 0.5;
	}else{
		$ybin=1;
	}
	my $xlim=0;
	if($Average_sequencing_depth<30)
	{
		$xlim=100;
		$xbin=20;
	}elsif($Average_sequencing_depth < 50)
	{
		$xlim=160;
		$xbin=20;
	}elsif($Average_sequencing_depth  < 120)
	{
		$xlim=250;
		$xbin=50;
	}else{
		$xlim=600;
		$xbin=100;
	}
	&histPlot($outdir,"$outdir/depth_frequency.txt",$ylim,$ybin,$xlim,$xbin);
	&cumuPlot($outdir,"$outdir/cumu.txt",$xlim,$xbin);
	&Seqdepth($outdir,"$outdir/depth_frequency.txt","$outdir/cumu.txt");
}

sub cumuPlot {
	my ($outdir, $dataFile, $xlim, $xbin) = @_;
	my $figFile = "$outdir/cumuPlot.pdf";
	my $Rline=<<Rline;
	pdf(file="$figFile",w=8,h=6)
	rt <- read.table("$dataFile")
	opar <- par()
	x <- rt\$V1[1:($xlim+1)]
	y <- 100*rt\$V2[1:($xlim+1)]
	par(mar=c(4.5, 4.5, 2.5, 2.5))
	plot(x,y,col="red",type='l', lwd=2, bty="l",xaxt="n",yaxt="n", xlab="", ylab="", ylim=c(0, 100))
	xpos <- seq(0,$xlim,by=$xbin)
	ypos <- seq(0,100,by=20)
	axis(side=1, xpos, tcl=0.2, labels=FALSE)
	axis(side=2, ypos, tcl=0.2, labels=FALSE)
	mtext("Cumulative sequencing depth",side=1, line=2, at=median(xpos), cex=1.5 )
	mtext("Fraction of bases (%)",side=2, line=3, at=median(ypos), cex=1.5 )
	mtext(xpos, side=1, las=1, at=xpos, line=0.3, cex=1.4)
	mtext(ypos, side=2, las=1, at=ypos, line=0.3, cex=1.4)
	par(opar)
	dev.off()
	png(filename="$outdir/cumuPlot.png",width = 480, height = 360,type='cairo')
	par(mar=c(4.5, 4.5, 2.5, 2.5))
	plot(x,y,col="red",type='l', lwd=3, bty="l",xaxt="n",yaxt="n", xlab="", ylab="", ylim=c(0, 100))
	xpos <- seq(0,$xlim,by=$xbin)
	ypos <- seq(0,100,by=20)
	axis(side=1, xpos, tcl=0.2, labels=FALSE)
	axis(side=2, ypos, tcl=0.2, labels=FALSE)
	mtext("Cumulative sequencing depth",side=1, line=2, at=median(xpos), cex=1.5 )
	mtext("Fraction of bases (%)",side=2, line=3, at=median(ypos), cex=1.5 )
	mtext(xpos, side=1, las=1, at=xpos, line=0.3, cex=1.5)
	mtext(ypos, side=2, las=1, at=ypos, line=0.3, cex=1.5)
	par(opar)
	dev.off()
	
Rline
	open (ROUT,">$figFile.R");
	print ROUT $Rline;
	close(ROUT);

	system("$RApp CMD BATCH  $figFile.R");
}


sub histPlot {
	my ($outdir, $dataFile, $ylim, $ybin, $xlim, $xbin) = @_;
	my $figFile = "$outdir/histPlot.pdf";
	my $Rline=<<Rline;
	pdf(file="$figFile",w=8,h=6)
	rt <- read.table("$dataFile")
	opar <- par()
	t=sum(rt\$V2[($xlim+1):length(rt\$V2)])
	y=c(rt\$V2[1:$xlim],t)
	y <- y*100
	x <- rt\$V1[1:($xlim+1)]
	par(mar=c(4.5, 4.5, 2.5, 2.5))
	plot(x,y,col="blue",type='h', lwd=1.5, xaxt="n",yaxt="n", xlab="", ylab="", bty="l",ylim=c(0,$ylim),xlim=c(0,$xlim))
	xpos <- seq(0,$xlim,by=$xbin)
	ypos <- seq(0,$ylim,by=$ybin)
	axis(side=1, xpos, tcl=0.2, labels=FALSE)
	axis(side=2, ypos, tcl=0.2, labels=FALSE)
	mtext("Sequencing depth",side=1, line=2, at=median(xpos), cex=1.5 )
	mtext("Fraction of bases (%)",side=2, line=3, at=median(ypos), cex=1.5 )
	end <- length(xpos)-1
	mtext(c(xpos[1:end],"$xlim+"), side=1, las=1, at=xpos, line=0.3, cex=1.4)
	mtext(ypos, side=2, las=1, at=ypos, line=0.3, cex=1.4)
	par(opar)
	dev.off()
	png(filename="$outdir/histPlot.png",width = 480, height = 360, type='cairo')
	par(mar=c(4.5, 4.5, 2.5, 2.5))
	plot(x,y,col="blue",type='h', lwd=1.5, xaxt="n",yaxt="n", xlab="", ylab="", bty="l",ylim=c(0,$ylim),xlim=c(0,$xlim))
	xpos <- seq(0,$xlim,by=$xbin)
	ypos <- seq(0,$ylim,by=$ybin)
	axis(side=1, xpos, tcl=0.2, labels=FALSE)
	axis(side=2, ypos, tcl=0.2, labels=FALSE)
	mtext("Sequencing depth",side=1, line=2, at=median(xpos), cex=1.5 )
	mtext("Fraction of bases (%)",side=2, line=3, at=median(ypos), cex=1.5 )
	end <- length(xpos)-1
	mtext(c(xpos[1:end],"$xlim+"), side=1, las=1, at=xpos, line=0.3, cex=1.5)
	mtext(ypos, side=2, las=1, at=ypos, line=0.3, cex=1.5)
	par(opar)
	dev.off()
Rline
	open (ROUT,">$figFile.R");
	print ROUT $Rline;
	close(ROUT);

	system("$RApp CMD BATCH  $figFile.R");
#	system("rm  $figFile.R  $figFile.Rout");
}

sub Seqdepth {
	my($outDir, $depth_frequency, $cumu) = @_;
	open TEMP,">$outDir/Sequencing-depth.R"||die"Can not open the file:$!";
	print TEMP "
#==========================================================================================
#for pdf
pdf(file='$outDir/Sequencing-depth.pdf',w = 8, h = 6)
rt <- read.table('$depth_frequency')
par()
select_list=seq(from=40,to=100,by=10)
for (j in select_list){
        select_sum=sum(rt\$V2[(j+1):length(rt\$V2)])
        if(select_sum<=0.005){
                break
        }
}
t=sum(rt\$V2[(j+1):length(rt\$V2)])
y=c(rt\$V2[1:j],t)
y <- y*100
x <- rt\$V1[1:(j+1)]
par(mar=c(4.5, 4.5, 2.5, 4.5))
plot(x,y,col='blue',type='l', lwd=3, xaxt='n',yaxt='n', xlab='', ylab='', bty='l',ylim=c(0,max(y)),xlim=c(0,j))
box()
xpos <- seq(0,j,by=10)
ypos <- seq(0,max(y),by=1)
axis(side=1, xpos, labels=FALSE)
axis(side=2, ypos, labels=FALSE)
mtext('Sequencing depth',side=1, line=2, at=median(xpos), cex=1.5 )
mtext('Fraction of bases (%)',side=2, line=3, at=median(ypos), cex=1.5 )
end <- length(xpos)-1
mtext(c(xpos[1:end],paste(j,\"+\",sep=\"\")), side=1, las=1, at=xpos, line=0.8, cex=1.5)
mtext(ypos, side=2, las=1, at=ypos, line=0.8, cex=1.5)
########
par(new=T,ann=F)
#rt1 <- read.table('$cumu')
rt1<-rt
row_index<-nrow(rt)
for(i in 2:row_index){
        rt1[i,2]<-rt1[i-1,2]+rt[i,2]
        i=i+1
}
x1 <- rt1\$V1[1:(j+1)]
y1 <- 100*rt1\$V2[1:(j+1)]
plot(x1,y1,col='red',type='l', lwd=3, bty='n',xaxt='n',yaxt='n', xlab='', ylab='', ylim=c(0, 100))
y1pos <- seq(0,100,by=20)
axis(side=4, y1pos,labels=FALSE)
mtext('Cumulative fraction of bases (%)',side=4, line=3, at=median(y1pos), cex=1.5 )
mtext(y1pos, side=4, las=1, at=y1pos, line=0.8, cex=1.5)
legend('right',legend=c('Sequencing depth','Cumulative sequencing depth'),pch=c(-1),lty=1,lwd=3,col=c('blue','red'),cex=1)
dev.off()
#================================================================================================
#for png
png(filename='$outDir/Sequencing-depth.png',width = 480, height = 360,type='cairo')
rt <- read.table('$depth_frequency')
par()
select_list=seq(from=40,to=100,by=10)
for (j in select_list){
        select_sum=sum(rt\$V2[(j+1):length(rt\$V2)])
        if(select_sum<=0.005){
                break
        }
}
t=sum(rt\$V2[(j+1):length(rt\$V2)])
y=c(rt\$V2[1:j],t)
y <- y*100
x <- rt\$V1[1:(j+1)]
par(mar=c(4.5, 4.5, 2.5, 4.5))
plot(x,y,col='blue',type='l', lwd=3, xaxt='n',yaxt='n', xlab='', ylab='', bty='l',ylim=c(0,max(y)),xlim=c(0,j))
box()
xpos <- seq(0,j,by=10)
ypos <- seq(0,max(y),by=1)
axis(side=1, xpos, labels=FALSE)
axis(side=2, ypos, labels=FALSE)
mtext('Sequencing depth',side=1, line=2, at=median(xpos), cex=1.5 )
mtext('Fraction of bases (%)',side=2, line=3, at=median(ypos), cex=1.5 )
end <- length(xpos)-1
mtext(c(xpos[1:end],paste(j,\"+\",sep=\"\")), side=1, las=1, at=xpos, line=0.8, cex=1.5)
mtext(ypos, side=2, las=1, at=ypos, line=0.8, cex=1.5)
########
par(new=T,ann=F)
#rt1 <- read.table('$cumu')
rt1<-rt
row_index<-nrow(rt)
for(i in 2:row_index){
        rt1[i,2]<-rt1[i-1,2]+rt[i,2]
        i=i+1
}
x1 <- rt1\$V1[1:(j+1)]
y1 <- 100*rt1\$V2[1:(j+1)]
plot(x1,y1,col='red',type='l', lwd=3, bty='n',xaxt='n',yaxt='n', xlab='', ylab='', ylim=c(0, 100))
y1pos <- seq(0,100,by=20)
axis(side=4, y1pos, labels=FALSE)
mtext('Fraction of bases (%)',side=4, line=3, at=median(y1pos), cex=1.5 )
mtext(y1pos, side=4, las=1, at=y1pos, line=0.8, cex=1.5)
legend('right',legend=c('Sequencing depth','Cumulative sequencing depth'),pch=c(-1),lty=1,lwd=3,col=c('blue','red'),cex=1)
dev.off()
#================================================================================================
";
	close TEMP;
	`$RApp -f $outDir/Sequencing-depth.R`;
}