## ---------------------------
##
## Purpose of script: Analyses of genomic datasets from manuscript "AGO1x prevents dsRNA-induced interferon signaling to promote proliferation of breast cancer cells" (doi:YYY)
##
## Author: Dr. Joao C Guimaraes
##
## Date Created: 2020-02-27
##
## Email: joaoguima@gmail.com
##
## Tested with:
##	- R 3.3.3 [macOS 10.15.2 : x86_64-apple-darwin13.4.0 (64-bit)]
##
## ---------------------------


##### Load/Install dependencies #####
if (!require('DESeq')) install.packages('DESeq'); library('DESeq')

##### Create folders to save output #####
dir.create(file.path(".", "figures"), showWarnings = TRUE)
dir.create(file.path(".", "results"), showWarnings = TRUE)

#########
##### Analysis of RT conservation genome-wide
#########

data = read.csv("./data/3utr.conservation/human.mrnas/ext-cons.csv",sep=",",header=T)

pdf("./figures/human.rt.conservation.pdf",width=8,height=8,paper='special') 
hist(data$ext_cons,breaks=100,col="grey", main="",cex.axis=1.5,cex.lab=1.5,xlab="Conservation of putative RT region")
dev.off()

pdf("./figures/human.agos.rt.conservation.pdf",width=8,height=8,paper='special') 
b=barplot(c(data[data$gene=="NM_012199","ext_cons"],
  		    data[data$gene=="NM_012154","ext_cons"],
		    data[data$gene=="NM_024852","ext_cons"],
		    data[data$gene=="NM_017629","ext_cons"]),  
		    ylim=c(0,1), ylab="Avg. conservation in extended region", cex.lab=1.5, cex.axis=1.5, width=0.1, col=grey.colors(4))
axis(1,b,labels=c("AGO1","AGO2","AGO3","AGO4"), cex.axis=1.3)
dev.off()


#########
##### Analysis of binding partners for AGO1 and AGO1x (over expression cell lines)
#########

data = read.csv("./data/ms.ip.over.exp.ago1x.vs.ago1/ms.ip.counts.oe.ago1x.vs.ago1.csv",sep="\t",header=T)
data = data[!duplicated(data[,1]),]; rownames(data) = data[,1]; data = data[,-1];

## Estimate ago1x/ago1 fold-changes using DESeq
countTable = data
colData = data.frame(row.names=colnames(countTable), condition = c("ago1","ago1","ago1x","ago1x","ago1","ago1","ago1x","ago1x") )
cds = newCountDataSet( countTable, colData$condition )
cds = estimateSizeFactors( cds )
sizeFactors( cds )
cds = estimateDispersions( cds , method="blind",sharingMode="fit-only",fitType="local")
cds_norm = counts(cds, normalized=TRUE)

res = nbinomTest( cds, "ago1" , "ago1x"  )
res = res[!is.na(res$log2FoldChange),]
res$log2FoldChange[res$log2FoldChange==Inf] = max(res$log2FoldChange[res$log2FoldChange!=Inf])
res$log2FoldChange[res$log2FoldChange==-Inf] = min(res$log2FoldChange[res$log2FoldChange!=-Inf])
rownames(res) = res$id

data_ct = data.frame(ago1 	= res$baseMeanA+1,
					 ago1x 	= res$baseMeanB+1,
					 row.names=res$id)

up = res[res$log2FoldChange > 0 & res$padj < 0.01,]
dw = res[res$log2FoldChange < 0 & res$padj < 0.01,]

res_ms_oe_ip = res[,c("id","baseMeanA","baseMeanB","log2FoldChange","padj")]; colnames(res_ms_oe_ip)=c("uniprotID","msCountsAGO1","msCountsAGO1x","log2FoldChange","padj"); rownames(res_ms_oe_ip) = NULL;

# Save to file
write.csv(res_ms_oe_ip,"./results/oe.cell.lines.ms.ip.ago1.vs.ago1x.csv")

pdf("./figures/binding.partners.over.exp.ago1x.vs.ago1.pdf",width=8,height=8,paper='special') 
plot(data_ct$ago1, data_ct$ago1x, pch=20, xlab="Spectral counts (AGO1)", ylab="Spectral counts (AGO1x)",cex.lab=1.3,col="grey",log="xy");
abline(0,1,lty=2,col="orange");
#points(data_ct[up$id,"ago1"],data_ct[up$id,"ago1x"],pch=20,col=colors()[c(33)])
#text(data_ct[up$id,"ago1"],data_ct[up$id,"ago1x"],up$id,pos=4,cex=0.6)
points(data_ct[dw$id,"ago1"],data_ct[dw$id,"ago1x"],pch=20,col=colors()[c(125)])
text(data_ct[dw$id,"ago1"],data_ct[dw$id,"ago1x"],dw$id,pos=4,cex=0.6)
legend("bottomright",legend=c("Up-regulated","Down-regulated"),col=colors()[c(33,125)],pch=20,inset=0.01)
dev.off()


