# Script to analyze NIJK03 dual SFFV CiBER-seq data

library(Biostrings)
library(ggplot2)

# Download counts tables into current directory from GEO
download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE276055&format=file&file=GSE276055%5FNIJK03%5FiRFP%5Fbccounts%5Ffinal%2Etxt%2Egz",
              destfile = "./NIJK03_iRFP_bccounts_final.txt")
download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE276055&format=file&file=GSE276055%5FNIJK03%5FmCherry%5Fbccounts%5Ffinal%2Etxt%2Egz",
              destfile = "./NIJK03_mCherry_bccounts_final.txt")

# Read iRFP counts table
iRfpCounts <- read.delim("./NIJK03_iRFP_bccounts_final.txt", header = TRUE, stringsAsFactors = FALSE)
colnames(iRfpCounts) <- c('iRFP_barcode', 'post_iRFP1', 'pre_iRFP1', 'post_iRFP2',
                          'pre_iRFP2', 'post_iRFP3', 'pre_iRFP3')
iRfpCounts$sum = rowSums(iRfpCounts[ , c(2:7)], na.rm = TRUE)   # New column with the sum of all counts
iRfpCountsFiltered <- iRfpCounts[iRfpCounts$sum >= 100, ]    # Remove rows where sum < 100

View(iRfpCounts)
View(iRfpCountsFiltered)

# Read mCherry counts table
# Note: mCherry barcodes need to be converted to the reverse complement.
mCherryCounts <- read.delim("./NIJK03_mCherry_bccounts_final.txt", header = TRUE, stringsAsFactors = FALSE)
colnames(mCherryCounts) <- c('mCherry_barcode', 'post_mCherry1', 'pre_mCherry1', 'post_mCherry2',
                             'pre_mCherry2', 'post_mCherry3', 'pre_mCherry3')
mCherryCounts$sum = rowSums(mCherryCounts[ , c(2:7)], na.rm = TRUE)   # New column with the sum of all counts
mCherryCountsFiltered <- mCherryCounts[mCherryCounts$sum >= 100, ]    # Remove rows where sum < 100
View(mCherryCounts)
View(mCherryCountsFiltered)

# Write reverse comp of barcode to match to look up table
mCherryCountsFiltered$RevComp <- sapply(mCherryCountsFiltered$mCherry_barcode, 
                                        function(x) as.character(reverseComplement(DNAString(x))))

# Read in final lookup table made from NIJL009 PacBio sequencing
final_lookup <- read.csv("../../lookup-table/pLJK06_v4_NIJL009_final_lookup_v2.csv", 
                         stringsAsFactors = FALSE)
View(final_lookup)

# Merge iRfpCountsFiltered with final lookup
lookup_iRFP <- merge(x = final_lookup, y = iRfpCountsFiltered[ , c("iRFP_barcode", 'post_iRFP1', 'pre_iRFP1', 'post_iRFP2',
                                                                   'pre_iRFP2', 'post_iRFP3', 'pre_iRFP3')], 
                     by.x='bc1_neighborhood', by.y='iRFP_barcode', all.x = TRUE, all.y = TRUE)
View(lookup_iRFP)

# Merge lookup_iRFP with mCherryCountsFiltered
lookup_iRFP_mCherry <- merge(x = lookup_iRFP, 
                             y = mCherryCountsFiltered[ , c("RevComp", 'post_mCherry1', 'pre_mCherry1', 'post_mCherry2',
                                                            'pre_mCherry2', 'post_mCherry3', 'pre_mCherry3')], 
                             by.x='bc2_neighborhood', by.y='RevComp', all.x = TRUE, all.y = TRUE)
View(lookup_iRFP_mCherry)

# Remove rows where bc1 or bc2 is NA, meaning it wasn't seen in the PacBio sequencing run and we don't know
# which guides bc1 and bc2 link to
counts_final <- lookup_iRFP_mCherry %>% filter(!is.na(bc1_neighborhood) & !is.na(bc2_neighborhood))

# Want to get rid of things where count is super low
counts_final_fc <- counts_final
counts_final_fc[is.na(counts_final_fc)] <- 0  # Change all NA values to 0
counts_final_fc <- mutate(counts_final_fc,
                          avg_count_experiment = (pre_iRFP1 + post_iRFP1 + pre_iRFP2 + post_iRFP2 + 
                                                    pre_iRFP3 + post_iRFP3 + pre_mCherry1 + pre_mCherry2 + pre_mCherry3 +
                                                    post_mCherry1 + post_mCherry2 + post_mCherry3) / 12)

counts_final_fc <- counts_final_fc[counts_final_fc$avg_count_experiment >= 32, ]    # Remove rows where avg count < 32


# Compare barcode counts

# Rep 1 CRISPRi uninduced vs induced, iRFP
tiff("./NIJK03_comparison1.tiff", units="in", width=5, height=4, res=300)
ggplot(counts_final_fc, aes(x=log2(pre_iRFP1), y=log2(post_iRFP1))) + geom_point(alpha = 0.1) +
  xlim(0,14) + ylim(0,14) + xlab("log2(Rep 1 uninduced iRFP counts)") + ylab("log2(Rep 1 induced iRFP1 counts)") + 
  theme(text = element_text(size = 15), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
cor.test(counts_final_fc$pre_iRFP1, counts_final_fc$post_iRFP1, method=c("spearman"))

# Rep 1 iRFP vs mCherry, CRISPRi uninduced
tiff("./NIJK03_comparison2.tiff", units="in", width=5, height=4.1, res=300)
ggplot(counts_final_fc, aes(x=log2(pre_iRFP1), y=log2(pre_mCherry1))) + geom_point(alpha = 0.1) +
  xlim(0,14) + ylim(0,14) + xlab("log2(Rep 1 uninduced iRFP counts)") + ylab("log2(Rep 1 uninduced mCherry counts)") + 
  theme(text = element_text(size = 15), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
cor.test(counts_final_fc$pre_iRFP1, counts_final_fc$pre_mCherry1, method=c("spearman"))

# Rep 1 CRISPRi uninduced vs induced, mCherry
tiff("./NIJK03_comparison3.tiff", units="in", width=5, height=4, res=300)
ggplot(counts_final_fc, aes(x=log2(pre_mCherry1), y=log2(post_mCherry1))) + geom_point(alpha = 0.1) +
  xlim(0,14) + ylim(0,14) + xlab("log2(Rep 1 uninduced mCherry counts)") + ylab("log2(Rep 1 induced mCherry counts)") +
  theme(text = element_text(size = 15), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
cor.test(counts_final_fc$pre_mCherry1, counts_final_fc$post_mCherry1, method=c("spearman"))

# Rep 2 iRFP vs mCherry, CRISPRi uninduced
tiff("./NIJK03_comparison4.tiff", units="in", width=5, height=4.1, res=300)
ggplot(counts_final_fc, aes(x=log2(pre_iRFP2), y=log2(pre_mCherry2))) + geom_point(alpha = 0.1) +
  xlim(0,14) + ylim(0,14) + xlab("log2(Rep 2 uninduced iRFP counts)") + ylab("log2(Rep 2 uninduced mCherry counts)") + 
  theme(text = element_text(size = 15), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
cor.test(counts_final_fc$pre_iRFP2, counts_final_fc$pre_mCherry2, method=c("spearman"))

# Rep 3 iRFP vs mCherry, CRISPRi uninduced
tiff("./NIJK03_comparison5.tiff", units="in", width=5, height=4.1, res=300)
ggplot(counts_final_fc, aes(x=log2(pre_iRFP3), y=log2(pre_mCherry3))) + geom_point(alpha = 0.1) +
  xlim(0,14) + ylim(0,14) + xlab("log2(Rep 3 uninduced iRFP counts)") + ylab("log2(Rep 3 uninduced mCherry counts)") + 
  theme(text = element_text(size = 15), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
cor.test(counts_final_fc$pre_iRFP3, counts_final_fc$pre_mCherry3, method=c("spearman"))

# Combine barcodes by gRNA
countsbygRNA <- counts_final_fc %>%
  group_by(sgRNA1) %>%
  summarize(sgRNA2 = sgRNA2,
            bcs_perguide = bcs_perguide,
            pre_iRFP1 = sum(pre_iRFP1, na.rm = TRUE),
            pre_iRFP2 = sum(pre_iRFP2, na.rm = TRUE),
            pre_iRFP3 = sum(pre_iRFP3, na.rm = TRUE),
            post_iRFP1 = sum(post_iRFP1, na.rm = TRUE),
            post_iRFP2 = sum(post_iRFP2, na.rm = TRUE),
            post_iRFP3 = sum(post_iRFP3, na.rm = TRUE),
            pre_mCherry1 = sum(pre_mCherry1, na.rm = TRUE),
            pre_mCherry2 = sum(pre_mCherry2, na.rm = TRUE),
            pre_mCherry3 = sum(pre_mCherry3, na.rm = TRUE),
            post_mCherry1 = sum(post_mCherry1, na.rm = TRUE),
            post_mCherry2 = sum(post_mCherry2, na.rm = TRUE),
            post_mCherry3 = sum(post_mCherry3, na.rm = TRUE))
countsbygRNA <- countsbygRNA[!duplicated(countsbygRNA), ]
View(countsbygRNA)


### DESeq2 analysis on barcodes with UMI de-duplicated data ###

deseq2_count <- select(counts_final_fc, bc2_neighborhood, bc1_neighborhood, sgRNA1, sgRNA2,
                       pre_iRFP1, post_iRFP1, pre_iRFP2, post_iRFP2, pre_iRFP3, post_iRFP3,
                       pre_mCherry1, post_mCherry1, pre_mCherry2, post_mCherry2,
                       pre_mCherry3, post_mCherry3)
deseq2_count <- mutate(deseq2_count, bc_id = paste(bc2_neighborhood, bc1_neighborhood, sgRNA1, sep = "_"))
deseq2_count <- select(deseq2_count, bc_id,
                       pre_iRFP1, post_iRFP1, pre_iRFP2, post_iRFP2, pre_iRFP3, post_iRFP3,
                       pre_mCherry1, post_mCherry1, pre_mCherry2, post_mCherry2,
                       pre_mCherry3, post_mCherry3)
View(deseq2_count)

# Create table of sample information
coldata <- data.frame(id = c("pre_iRFP1", "post_iRFP1", "pre_iRFP2", "post_iRFP2",
                             "pre_iRFP3", "post_iRFP3", "pre_mCherry1", "post_mCherry1",
                             "pre_mCherry2", "post_mCherry2", "pre_mCherry3", "post_mCherry3"),
                      replicate = c("1", "1", "2", "2", "3", "3", 
                                    "1", "1", "2", "2", "3", "3"),
                      barcode = c("iRFP", "iRFP", "iRFP", "iRFP", "iRFP", "iRFP",
                                  "mCherry", "mCherry", "mCherry","mCherry","mCherry","mCherry"),
                      condition = c("pre", "post", "pre", "post", "pre", "post",
                                    "pre", "post", "pre", "post", "pre", "post"))

dds <- DESeqDataSetFromMatrix(countData = deseq2_count, 
                              colData = coldata, 
                              design = ~replicate + condition * barcode, tidy = TRUE)
# Design formula takes into account the factor variables replicate, condition, and barcode
# and also accounts for the interaction between condition and barcode (aka, the post-mCherry condition)

# Set custom reference levels
dds$condition <- factor(dds$condition, levels = c("pre","post"))
dds$barcode <- factor(dds$barcode, levels = c("iRFP","mCherry"))

# Run DESeq
dds <- DESeq(dds)
resultsNames(dds)

# mCherry / iRFP results table
res <- results(dds, name = "barcode_mCherry_vs_iRFP")
summary(res)
res_barcode_mCherry_vs_iRFP <- as.data.frame(res)
View(res_barcode_mCherry_vs_iRFP)
write.csv(res_barcode_mCherry_vs_iRFP, "./DESeq_res_barcode_mCherry_vs_iRFP.csv")
res_barcode_mCherry_vs_iRFP <- read.csv("./DESeq_res_barcode_mCherry_vs_iRFP.csv")

# post / pre dox induction results table
res_condition <- results(dds, name = "condition_post_vs_pre")
summary(res_condition)
res_condition_post_vs_pre <- as.data.frame(res_condition)
View(res_condition_post_vs_pre)
write.csv(res_condition_post_vs_pre, "./DESeq_res_condition_post_vs_pre.csv")
res_condition_post_vs_pre <- read.csv("./DESeq_res_condition_post_vs_pre.csv")

# Plot mCherry / iRFP DESeq2 comparison
tiff("./mCherry_vs_iRFP_barcode.tiff", units="in", width=5, height=4, res=300)
ggplot(res_barcode_mCherry_vs_iRFP, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_barcode_mCherry_vs_iRFP, padj < 0.01 & (log2FoldChange > 1 | log2FoldChange < -1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4", alpha = 0.3) + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-11, 11) + ylim(0,40) +
  labs(x="log2FC(mCherry / iRFP barcode)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()

# Plot post / pre dox induction DESeq2 comparison
tiff("./res_condition_post_vs_pre.tiff", units="in", width=5, height=4, res=300)
ggplot(res_condition_post_vs_pre, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_condition_post_vs_pre, padj < 0.01 & (log2FoldChange > 1 | log2FoldChange < -1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4") + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-8, 8) + ylim(0,5) +
  labs(x="log2FC(dox induced / uninduced)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()


### DESeq2 analysis on sgRNAs with UMI de-duplicated data ###

deseq2_count_gRNA <- select(countsbygRNA, sgRNA1,
                            pre_iRFP1, post_iRFP1, pre_iRFP2, post_iRFP2, pre_iRFP3, post_iRFP3,
                            pre_mCherry1, post_mCherry1, pre_mCherry2, post_mCherry2,
                            pre_mCherry3, post_mCherry3)
deseq2_count_gRNA <- as.data.frame(deseq2_count_gRNA)
View(deseq2_count_gRNA)

dds_gRNA <- DESeqDataSetFromMatrix(countData = deseq2_count_gRNA, 
                                   colData = coldata, 
                                   design = ~replicate + condition * barcode, tidy = TRUE)

# Design formula takes into account the factor variables replicate, condition, and barcode
# and also accounts for the interaction between condition and barcode (aka, the post-mCherry condition)

# Set custom reference levels
dds_gRNA$condition <- factor(dds$condition, levels = c("pre","post"))
dds_gRNA$barcode <- factor(dds$barcode, levels = c("iRFP","mCherry"))

# Run DESeq
dds_gRNA <- DESeq(dds_gRNA)
resultsNames(dds_gRNA)

# mCherry / iRFP results
res_gRNA <- results(dds_gRNA, name = "barcode_mCherry_vs_iRFP")
summary(res_gRNA)
res_barcode_mCherry_vs_iRFP_gRNA <- as.data.frame(res_gRNA)
View(res_barcode_mCherry_vs_iRFP_gRNA)

# post / pre dox induction results
res_condition_gRNA <- results(dds_gRNA, name = "condition_post_vs_pre")
summary(res_condition_gRNA)
res_condition_post_vs_pre_gRNA <- as.data.frame(res_condition_gRNA)
View(res_condition_post_vs_pre_gRNA)

# Plot mCherry / iRFP DESeq2 comparison
tiff("./mCherry_vs_iRFP_gRNA.tiff", units="in", width=5, height=4, res=300)
ggplot(res_barcode_mCherry_vs_iRFP_gRNA, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_barcode_mCherry_vs_iRFP_gRNA, padj < 0.01 & 
                             (log2FoldChange < -1 | log2FoldChange > 1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4", alpha = 0.3) + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-11, 11) + ylim(0, 40) +
  labs(x="log2FC(mCherry / iRFP)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()

# Plot post / pre dox induction DESeq2 comparison
tiff("./res_condition_post_vs_pre_gRNA.tiff", units="in", width=5, height=4, res=300)
ggplot(res_condition_post_vs_pre_gRNA, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_condition_post_vs_pre_gRNA, padj < 0.01 & (log2FoldChange > 1 | log2FoldChange < -1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4") + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-8, 8) + ylim(0,5) +
  labs(x="log2FC(dox induced / uninduced)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()


########################################################################################
# Repeat analysis with data without UMI de-duplication

# Read iRFP counts table
iRfpCounts2 <- read.delim("./NIJK03_iRFP_bccounts_noumi.txt", 
                          header = TRUE, stringsAsFactors = FALSE)
colnames(iRfpCounts2) <- c('iRFP_barcode', 'post_iRFP1', 'pre_iRFP1', 'post_iRFP2',
                           'pre_iRFP2', 'post_iRFP3', 'pre_iRFP3')
iRfpCounts2$sum = rowSums(iRfpCounts2[ , c(2:7)], na.rm = TRUE)   # New column with the sum of all counts
iRfpCountsFiltered2 <- iRfpCounts2[iRfpCounts2$sum >= 100, ]    # Remove rows where sum < 100

View(iRfpCounts2)
View(iRfpCountsFiltered2)

# Read mCherry counts table
# Note: mCherry barcodes need to be converted to the reverse complement.
mCherryCounts2 <- read.delim("./NIJK03_mCherry_bccounts_noumi.txt", 
                             header = TRUE, stringsAsFactors = FALSE)
colnames(mCherryCounts2) <- c('mCherry_barcode', 'post_mCherry1', 'pre_mCherry1', 'post_mCherry2',
                              'pre_mCherry2', 'post_mCherry3', 'pre_mCherry3')
mCherryCounts2$sum = rowSums(mCherryCounts2[ , c(2:7)], na.rm = TRUE)   # New column with the sum of all counts
mCherryCountsFiltered2 <- mCherryCounts2[mCherryCounts2$sum >= 100, ]    # Remove rows where sum < 100

View(mCherryCounts2)
View(mCherryCountsFiltered2)

# Write reverse comp of barcode to match to look up table
mCherryCountsFiltered2$RevComp <- sapply(mCherryCountsFiltered2$mCherry_barcode, 
                                         function(x) as.character(reverseComplement(DNAString(x))))

# Merge iRfpCountsFiltered with final lookup
lookup_iRFP2 <- merge(x = final_lookup, y = iRfpCountsFiltered2[ , c("iRFP_barcode", 'post_iRFP1', 'pre_iRFP1', 'post_iRFP2',
                                                                     'pre_iRFP2', 'post_iRFP3', 'pre_iRFP3')], 
                      by.x='bc1_neighborhood', by.y='iRFP_barcode', all.x = TRUE, all.y = TRUE)
View(lookup_iRFP2)

# Merge lookup_iRFP with mCherryCountsFiltered
lookup_iRFP_mCherry2 <- merge(x = lookup_iRFP2, 
                              y = mCherryCountsFiltered2[ , c("RevComp", 'post_mCherry1', 'pre_mCherry1', 'post_mCherry2',
                                                              'pre_mCherry2', 'post_mCherry3', 'pre_mCherry3')], 
                              by.x='bc2_neighborhood', by.y='RevComp', all.x = TRUE, all.y = TRUE)
View(lookup_iRFP_mCherry2)

# Remove rows where bc1 or bc2 is NA, meaning it wasn't seen in the PacBio sequencing run and we don't know
# which guides bc1 and bc2 link to
counts_final_noUMI <- lookup_iRFP_mCherry2 %>% filter(!is.na(bc1_neighborhood) & !is.na(bc2_neighborhood))

counts_final_noUMI_fc <- counts_final_noUMI

# Want to get rid of things where count is super low
counts_final_noUMI_fc[is.na(counts_final_noUMI_fc)] <- 0  # Change all NA values to 0
counts_final_noUMI_fc <- mutate(counts_final_noUMI_fc,
                                avg_count_experiment = (pre_iRFP1 + post_iRFP1 + pre_iRFP2 + post_iRFP2 + 
                                                          pre_iRFP3 + post_iRFP3 + pre_mCherry1 + pre_mCherry2 + pre_mCherry3 +
                                                          post_mCherry1 + post_mCherry2 + post_mCherry3) / 12)

counts_final_noUMI_fc <- counts_final_noUMI_fc[counts_final_noUMI_fc$avg_count_experiment >= 32, ]    # Remove rows where avg count < 32

counts_final_noUMI_fc <- filter(counts_final_noUMI_fc, (pre_iRFP1 + post_iRFP1 + 
                                                          pre_iRFP2 + post_iRFP2 +
                                                          pre_iRFP3 + post_iRFP3) > 0, 
                                (pre_mCherry1 + post_mCherry1 + pre_mCherry2 + post_mCherry2 +
                                   pre_mCherry3 + post_mCherry3) > 0)

write.csv(counts_final_noUMI_fc, "./NIJK03_counts_noUMI_fc.csv", row.names=FALSE)
counts_final_noUMI_fc <- read.csv("./NIJK03_counts_noUMI_fc.csv")

### DESeq2 analysis on barcodes on data without UMI de-duplication ###

View(counts_final_noUMI_fc)

deseq2_count_noumi <- select(counts_final_noUMI_fc, bc2_neighborhood, bc1_neighborhood, sgRNA1, sgRNA2,
                             pre_iRFP1, post_iRFP1, pre_iRFP2, post_iRFP2, pre_iRFP3, post_iRFP3,
                             pre_mCherry1, post_mCherry1, pre_mCherry2, post_mCherry2,
                             pre_mCherry3, post_mCherry3)
deseq2_count_noumi <- mutate(deseq2_count_noumi, bc_id = paste(bc2_neighborhood, bc1_neighborhood, sgRNA1, sep = "_"))
deseq2_count_noumi <- select(deseq2_count_noumi, bc_id,
                             pre_iRFP1, post_iRFP1, pre_iRFP2, post_iRFP2, pre_iRFP3, post_iRFP3,
                             pre_mCherry1, post_mCherry1, pre_mCherry2, post_mCherry2,
                             pre_mCherry3, post_mCherry3)
View(deseq2_count_noumi)

# Create table of sample information

coldata_noumiv1 <- data.frame(id = c("pre_iRFP1", "post_iRFP1", "pre_iRFP2", "post_iRFP2",
                                     "pre_iRFP3", "post_iRFP3", "pre_mCherry1", "post_mCherry1",
                                     "pre_mCherry2", "post_mCherry2", "pre_mCherry3", "post_mCherry3"),
                              replicate = c("1", "1", "2", "2", "3", "3", 
                                            "1", "1", "2", "2", "3", "3"),
                              barcode = c("iRFP", "iRFP", "iRFP", "iRFP", "iRFP", "iRFP",
                                          "mCherry", "mCherry", "mCherry","mCherry","mCherry","mCherry"),
                              condition = c("pre", "post", "pre", "post", "pre", "post",
                                            "pre", "post", "pre", "post", "pre", "post"))

dds_noumi <- DESeqDataSetFromMatrix(countData = deseq2_count_noumi, 
                                    colData = coldata_noumiv1, 
                                    design = ~replicate + condition * barcode, tidy = TRUE)
# Design formula takes into account the factor variables replicate, condition, and barcode
# and also accounts for the interaction between condition and barcode (aka, the post-mCherry condition)

# Set custom reference levels
dds_noumi$condition <- factor(dds_noumi$condition, levels = c("pre","post"))
dds_noumi$barcode <- factor(dds_noumi$barcode, levels = c("iRFP","mCherry"))

# Run DESeq
dds_noumi <- DESeq(dds_noumi)
resultsNames(dds_noumi)

# mCherry / iRFP results table
res <- results(dds_noumi, name = "barcode_mCherry_vs_iRFP")
summary(res)
res_barcode_mCherry_vs_iRFP_noumi <- as.data.frame(res)
View(res_barcode_mCherry_vs_iRFP_noumi)
write.csv(res_barcode_mCherry_vs_iRFP_noumi, "./analysis_noumi/DESeq_res_barcode_mCherry_vs_iRFP_noumi.csv")
res_barcode_mCherry_vs_iRFP_noumi <- read.csv("./analysis_noumi/DESeq_res_barcode_mCherry_vs_iRFP_noumi.csv")
nrow(subset(res_barcode_mCherry_vs_iRFP_noumi, padj < 0.01))

# post / pre dox induction results table
res_condition <- results(dds_noumi, name = "condition_post_vs_pre")
summary(res_condition)
res_condition_post_vs_pre_noumi <- as.data.frame(res_condition)
View(res_condition_post_vs_pre_noumi)
write.csv(res_condition_post_vs_pre_noumi, "./analysis_noumi/DESeq_res_condition_post_vs_pre_noumi.csv")
res_condition_post_vs_pre_noumi <- read.csv("./analysis_noumi/DESeq_res_condition_post_vs_pre_noumi.csv")

# Plot mCherry / iRFP DESeq2 comparison
res_barcode_mCherry_vs_iRFP_noumi2 <- res_barcode_mCherry_vs_iRFP_noumi
res_barcode_mCherry_vs_iRFP_noumi2$padj[res_barcode_mCherry_vs_iRFP_noumi2$padj < 1e-40] <- 1e-40

tiff("./mCherry_vs_iRFP_barcode_noumi.tiff", units="in", width=5, height=4, res=300)
ggplot(res_barcode_mCherry_vs_iRFP_noumi2, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_barcode_mCherry_vs_iRFP_noumi2, padj < 0.01 & (log2FoldChange > 1 | log2FoldChange < -1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4", alpha = 0.3) + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-12, 12) + ylim(0,40) +
  labs(x="log2FC(mCherry / iRFP barcode)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()

# Plot post / pre dox induction DESeq2 comparison
tiff("./res_condition_post_vs_pre_noumi.tiff", units="in", width=5, height=4, res=300)
ggplot(res_condition_post_vs_pre_noumi, aes(x=log2FoldChange, y=-log10(padj))) + 
  geom_point(na.rm = TRUE, size=2, colour = "gray", alpha = 0.3) + 
  geom_point(data = subset(res_condition_post_vs_pre_noumi, padj < 0.01 & (log2FoldChange > 1 | log2FoldChange < -1)), 
             aes(x=log2FoldChange, y=-log10(padj)), size=2, colour = "#ad82a4") + 
  geom_hline(yintercept=2, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=-1, linetype="dashed", color = "black", size=1) +
  geom_vline(xintercept=1, linetype="dashed", color = "black", size=1) +
  theme_bw() + xlim(-20, 20) + ylim(0,10) +
  labs(x="log2FC(dox induced / uninduced)", y="-log10(adjusted p-value)") + 
  theme(axis.text.x=element_text(hjust=1, size = 12), axis.text.y = element_text(size = 12), 
        axis.title = element_text(size = 15), 
        panel.border = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
dev.off()