library(tidyverse)
library(readxl)
library(ggplot2)
library(UpSetR)
library(ggplotify)
library(patchwork)

## Epigenetic heatmap

flow1 <- read_delim("epigenetic/summary_barcodes_hm_chr_flowcell1_chr.txt")

flow2 <- read_delim("epigenetic/summary_barcodes_hm_chr_flowcell2_chr.txt")

flow1 <- flow1[,-4]
flow2 <- flow2[,-4]

flow1$flowcell <- "Flow cell 1"
flow2$flowcell <- "Flow cell 2"

flowc <- rbind(flow1,flow2)

h_col <- names(flowc)[grepl("(^|_)(h)(_count|count)?$|\\.(h)$", names(flowc), ignore.case = TRUE)]
m_col <- names(flowc)[grepl("(^|_)(m)(_count|count)?$|\\.(m)$", names(flowc), ignore.case = TRUE)]

flowc_prep <- flowc %>%
  mutate(
    h = if(length(h_col) >= 1) .data[[h_col[1]]] else NA_real_,
    m = if(length(m_col) >= 1) .data[[m_col[1]]] else NA_real_
  )

flowc_long <- flowc_prep %>%
  pivot_longer(cols = c("h","m"), names_to = "epigenetic_type", values_to = "count") %>%
  mutate(
    epigenetic_type = tolower(epigenetic_type),
    count = as.numeric(count)
  )

flowc_long[flowc_long$epigenetic_type=="h",]$epigenetic_type <- "5hmC"
flowc_long[flowc_long$epigenetic_type=="m",]$epigenetic_type <- "5mC"

flowc_long$barcode <- paste("Barcode ",str_extract(flowc_long$barcode,"\\d+"),paste="")

pdf("Epigenetics_chr_barcodes_heatmaps.pdf",width=25,height=20)
for(i in unique(flowc_long$chr)){
  print(i)
  print(flowc_long %>% filter(chr==i))
  flowplot <- flowc_long %>% filter(chr==i) %>% ggplot(aes(x = epigenetic_type, y = barcode, fill = count,label=count)) +
      geom_tile(color = "white") +
      geom_text(color="black",size=8) +
      facet_wrap(~ flowcell, ncol = 2) +
      scale_fill_gradient(low = "lightyellow", high = "steelblue", na.value = "grey90") +
      labs(x = "Epigenetic variant type", y = "Barcode", fill = "Count", title = i) +
      theme_classic() +
      theme(
      plot.title = element_text(size = 18, hjust = 0.5),
      text = element_text(size = 18),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.background = element_rect(fill = "transparent")
      )
      print(flowplot)
}
dev.off()

## Structural variants heatmap

vcf_struc_var <- read.table("population.vcf")

pdf("Structural_variants_chr_barcodes_heatmaps.pdf",width=25,height=20)
for(i in unique(vcf_struc_var$V1)){
  if(grepl("(Unplaced|Mitochondria)",i)){
  }else{
    print(i)
    vcf_struc_var_filt <- vcf_struc_var %>% filter(V1==i)
    colnames(vcf_struc_var_filt) <- c("CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT","flowcell2_barcode09","flowcell2_barcode05","flowcell2_barcode19","flowcell2_barcode13","flowcell2_barcode01","flowcell2_barcode08","flowcell2_barcode12","flowcell2_barcode18","flowcell2_barcode04","flowcell2_barcode14","flowcell2_barcode02","flowcell2_barcode07","flowcell2_barcode10","flowcell2_barcode11","flowcell2_barcode06","flowcell2_barcode15","flowcell2_barcode03","flowcell2_barcode17","flowcell2_barcode16","flowcell1_barcode09","flowcell1_barcode05","flowcell1_barcode19","flowcell1_barcode13","flowcell1_barcode01","flowcell1_barcode08","flowcell1_barcode12","flowcell1_barcode18","flowcell1_barcode04","flowcell1_barcode14","flowcell1_barcode02","flowcell1_barcode07","flowcell1_barcode10","flowcell1_barcode11","flowcell1_barcode06","flowcell1_barcode15","flowcell1_barcode03","flowcell1_barcode17","flowcell1_barcode16","flowcell1_barcode01","flowcell1_barcode04","flowcell1_barcode02","flowcell1_barcode03","flowcell1_merged","flowcell2_merged")
    
    vcf_struc_var_filt <- vcf_struc_var_filt[, !duplicated(colnames(vcf_struc_var_filt))]
    vcf_struc_var_filt <- vcf_struc_var_filt %>% select(-one_of("flowcell1_merged","flowcell2_merged"))


    vcf_struc_var_filt$svtype <- str_extract(vcf_struc_var_filt$INFO,"SVTYPE=\\w{3}")
    vcf_struc_var_filt$svtype <- str_sub(vcf_struc_var_filt$svtype,-3)

    vcf_struc_var_filt$svlen <- str_extract(vcf_struc_var_filt$INFO,"SVLEN=(\\d+|-\\d+)")
    vcf_struc_var_filt$svlen <- str_extract(vcf_struc_var_filt$svlen,"(\\d+|-\\d+)")

    vcf_struc_var_filt <- vcf_struc_var_filt[, !duplicated(colnames(vcf_struc_var_filt))]
    
    stdev_len <- str_extract(vcf_struc_var_filt$INFO,"STDEV_LEN=\\d+")
    stdev_len_val <- as.numeric(str_extract(stdev_len,"\\d+"))
    vcf_struc_var_filt <- vcf_struc_var_filt[stdev_len_val==0,]

    vcf_bar_flow <- vcf_struc_var_filt %>%
      pivot_longer(cols = starts_with("flowcell"),
                   names_to = "sample",
                   values_to = "gt") %>%
      mutate(present = !str_detect(gt, ":NULL")) %>%
      separate(sample, into = c("flowcell", "barcode"), sep = "_", extra = "merge") %>%
      mutate(flowcell = ifelse(flowcell == "flowcell1", "Flow cell 1", "Flow cell 2"),
             barcode = str_replace(barcode, "barcode", "Barcode "),
             barcode = str_to_title(barcode)) %>%
      filter(present) %>%
      select(svtype,svlen,flowcell, barcode, POS)

    vcf_bar_flow$svlen <- as.numeric(vcf_bar_flow$svlen)

    vcf_bar_flow$svgroup <- ifelse(vcf_bar_flow$svlen<100,"50-100","100-1000")
    vcf_bar_flow[vcf_bar_flow$svtype=="DEL",]$svgroup <- ifelse(vcf_bar_flow[vcf_bar_flow$svtype=="DEL",]$svlen < -100,"-100","-100-0")

    vcf_bar_flow$svtypegroup <- paste(vcf_bar_flow$svtype,vcf_bar_flow$svgroup,paste="")
    vcf_bar_flow[vcf_bar_flow$svtypegroup=="BND NA ",]$svtypegroup <- "BND"
    vcf_bar_flow[vcf_bar_flow$svtypegroup=="DEL -100 ",]$svtypegroup <- "DEL < -100"

    vcf_summary <- vcf_bar_flow %>%
      group_by(flowcell, barcode, svtypegroup) %>%
      summarise(count = n(), .groups="drop")

    print(vcf_summary)

    figstruc <- vcf_summary %>%
      mutate(svtypegroup = factor(svtypegroup, levels = unique(svtypegroup)),
         barcode = factor(barcode, levels = sort(unique(barcode))),
         flowcell = factor(flowcell, levels = c("Flow cell 1", "Flow cell 2"))) %>%
      ggplot(aes(x = svtypegroup, y = barcode, fill = count,label=count)) +
      geom_tile(color = "white") +
      geom_text(color="black") +
      facet_wrap(~ flowcell, ncol = 2) +
      scale_fill_gradient(low = "lightyellow", high = "steelblue", na.value = "grey90") +
      labs(x = "Structural variant type", y = "Barcode", fill = "Count", title = i) +
      theme_classic() +
      theme(
      plot.title = element_text(size = 18, hjust = 0.5),
      text = element_text(size = 14),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.background = element_rect(fill = "transparent")
      )
    print(figstruc)
  }
}
dev.off()


## To update in Plots_fig
message("Starting Structural variants...")

vcf_struc_var <- read.table(gzfile(file_structural_var))

## Focus on chromosome 01
vcf_struc_var <- vcf_struc_var %>% filter(V1=="Bhaga_1")

colnames(vcf_struc_var) <- c("CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT","flowcell2_barcode09","flowcell2_barcode05","flowcell2_barcode19","flowcell2_barcode13","flowcell2_barcode01","flowcell2_barcode08","flowcell2_barcode12","flowcell2_barcode18","flowcell2_barcode04","flowcell2_barcode14","flowcell2_barcode02","flowcell2_barcode07","flowcell2_barcode10","flowcell2_barcode11","flowcell2_barcode06","flowcell2_barcode15","flowcell2_barcode03","flowcell2_barcode17","flowcell2_barcode16","flowcell1_barcode09","flowcell1_barcode05","flowcell1_barcode19","flowcell1_barcode13","flowcell1_barcode01","flowcell1_barcode08","flowcell1_barcode12","flowcell1_barcode18","flowcell1_barcode04","flowcell1_barcode14","flowcell1_barcode02","flowcell1_barcode07","flowcell1_barcode10","flowcell1_barcode11","flowcell1_barcode06","flowcell1_barcode15","flowcell1_barcode03","flowcell1_barcode17","flowcell1_barcode16","flowcell1_barcode01","flowcell1_barcode04","flowcell1_barcode02","flowcell1_barcode03","flowcell1_barcode0118","flowcell2_barcode0118")

vcf_struc_var$svtype <- str_extract(vcf_struc_var$INFO,"SVTYPE=\\w{3}")
vcf_struc_var$svtype <- str_sub(vcf_struc_var$svtype,-3)

stdev_len <- str_extract(vcf_struc_var$INFO,"STDEV_LEN=\\d+")
stdev_len_val <- as.numeric(str_extract(stdev_len,"\\d+"))
vcf_struc_var <- vcf_struc_var[stdev_len_val==0,]

## Flowcell2 barcode01
vcf_struc_var$fl2_bar01 <- str_extract(vcf_struc_var$flowcell2_barcode01,":\\D+")
vcf_struc_var$fl2_bar01 <- ifelse(vcf_struc_var$fl2_bar01==":NULL",1,0)

## Flowcell1 barcode01
vcf_struc_var$fl1_bar01 <- str_extract(vcf_struc_var$flowcell1_barcode01,":\\D+")
vcf_struc_var$fl1_bar01 <- ifelse(vcf_struc_var$fl1_bar01==":NULL",1,0)

## Flowcell2 barcode04
vcf_struc_var$fl2_bar04 <- str_extract(vcf_struc_var$flowcell2_barcode04,":\\D+")
vcf_struc_var$fl2_bar04 <- ifelse(vcf_struc_var$fl2_bar04==":NULL",1,0)

## Flowcell1 barcode04
vcf_struc_var$fl1_bar04 <- str_extract(vcf_struc_var$flowcell1_barcode04,":\\D+")
vcf_struc_var$fl1_bar04 <- ifelse(vcf_struc_var$fl1_bar04==":NULL",1,0)

vcf_bar01_flow1 <- vcf_struc_var %>% select("svtype","fl1_bar01","POS")
vcf_bar01_flow1$flowcell <- "Flow cell 1"
vcf_bar01_flow1$barcode <- "Barcode 01"
vcf_bar01_flow1 <- vcf_bar01_flow1 %>% filter(fl1_bar01==0)
vcf_bar01_flow1 <- vcf_bar01_flow1 %>% select("svtype","flowcell","barcode","POS")

vcf_bar01_flow2 <- vcf_struc_var %>% select("svtype","fl2_bar01","POS")
vcf_bar01_flow2$flowcell <- "Flow cell 2"
vcf_bar01_flow2$barcode <- "Barcode 01"
vcf_bar01_flow2 <- vcf_bar01_flow2 %>% filter(fl2_bar01==0)
vcf_bar01_flow2 <- vcf_bar01_flow2 %>% select("svtype","flowcell","barcode","POS")


vcf_bar04_flow1 <- vcf_struc_var %>% select("svtype","fl1_bar04","POS")
vcf_bar04_flow1$flowcell <- "Flow cell 1"
vcf_bar04_flow1$barcode <- "Barcode 04"
vcf_bar04_flow1 <- vcf_bar04_flow1 %>% filter(fl1_bar04==0)
vcf_bar04_flow1 <- vcf_bar04_flow1 %>% select("svtype","flowcell","barcode","POS")

vcf_bar04_flow2 <- vcf_struc_var %>% select("svtype","fl2_bar04","POS")
vcf_bar04_flow2$flowcell <- "Flow cell 2"
vcf_bar04_flow2$barcode <- "Barcode 04"
vcf_bar04_flow2 <- vcf_bar04_flow2 %>% filter(fl2_bar04==0)
vcf_bar04_flow2 <- vcf_bar04_flow2 %>% select("svtype","flowcell","barcode","POS")

str(vcf_bar04_flow2)

vcf_bar_flowtmp <- rbind(vcf_bar01_flow1,vcf_bar01_flow2,vcf_bar04_flow1,vcf_bar04_flow2)
head(vcf_bar_flowtmp)

vcf_bar_flowtmp %>% filter(flowcell=="Flow cell 1" & barcode=="Barcode 04") %>% group_by(svtype) %>% summarise(count=n()) 
vcf_summary %>% filter(flowcell=="Flow cell 1" & barcode=="Barcode 04")

figstruc <- ggplot(data=vcf_bar_flow,aes(x=svtype,fill=flowcell)) + geom_bar(position="dodge") + scale_fill_manual(values=c("skyblue","orange")) + xlab("Structural variant types") + ylab("Number of calls") +facet_wrap(~barcode) + theme_classic()+
  theme(plot.title = element_text(size = 18, hjust = 0.5),
        text=element_text(size=17),
        legend.text = element_text(size = 12),
        legend.background = element_rect(fill="transparent")) +theme(legend.title=element_blank())

ggsave(file.path(out_dir_fig, "Structural_variants_per_barcode.png"), plot = figstruc, width = 22, height = 6, dpi = 300)

xlist <- list(`Flow cell 1`=vcf_bar01_flow1$POS,`Flow cell 2`=vcf_bar01_flow2$POS)
upsetb1 <- as.ggplot(upset(fromList(xlist),order.by = "freq",mainbar.y.label = "Structural variant calls shared", sets.x.label = "Number of calls per flow cell",keep.order = T,text.scale = 3)) + ggtitle("Barcode 01") + theme(plot.title = element_text(size = 40, face = "bold"))

xlist <- list(`Flow cell 1`=vcf_bar04_flow1$POS,`Flow cell 2`=vcf_bar04_flow2$POS)
upsetb4 <- as.ggplot(upset(fromList(xlist),order.by = "freq",mainbar.y.label = "",sets=c("Flow cell 2","Flow cell 1"),keep.order = T,text.scale = 3)) + ggtitle("Barcode 04") + 
    theme(plot.title = element_text(size = 40, face = "bold"))

up <- upsetb1 + upsetb4

ggsave(file.path(out_dir_fig, "Structural_variants_per_barcode_shared.png"), plot = up, width = 22, height = 6, dpi = 300)
