library("imcRtools")
library("scater")
library("CATALYST")
library("dittoSeq")
library("viridis")
library("edgeR")
library("dplyr")
library("reshape2")
library("cytomapper")
library("lisaClust")
library("ggh4x")
library("openxlsx")
setwd("C:/Users/admin/IMCfiles/Yuebing/231115/data_steinpose/")

#Celltype merger
cellMerge <- function(object_update, object_old) {
  coldata_upd <- colData(object_update)
  coldata_old <- colData(object_old)
  levels(coldata_old$celltype) <- c(levels(coldata_old$celltype), levels(object_update$celltype))
  coldata_old[rownames(coldata_upd),]$celltype <- coldata_upd$celltype
  coldata_old$celltype <- droplevels(coldata_old$celltype)
  colData(object_old) <- coldata_old
  return(object_old)
}


#Embedding manual annotations----
spe_anno <- readRDS("objects/spe_dr.rds")

anno <- read.csv("annotation/v2/annotation_table.csv", row.names = 1, check.names = FALSE)
#Removing empty columns
anno <- anno[, colSums(anno) > 0]
#Finding multiannotated cells
anno_multi <- anno[rowSums(anno) > 1, ]

#Un-one-hot the annotation table
anno_collapsed <- mutate(anno, across(colnames(anno), \(x) na_if(x, 0)))

wh <- which(anno_collapsed == 1, arr.ind = TRUE)
anno_collapsed[wh] <- names(anno_collapsed)[wh[,"col"]]

anno_collapsed <-  tidyr::unite(anno_collapsed, celltype,
                                Choriod:CD8, na.rm = TRUE, remove = FALSE)

anno_new <- anno_collapsed[, "celltype", drop = FALSE]
anno_new <- rownames_to_column(anno_new)

anno_holder <- as.data.frame(colnames(spe_anno))
anno_holder$celltype <- "undefined"
colnames(anno_holder) <- c("rowname", "celltype")

anno_final <- rows_update(anno_holder, anno_new, by = "rowname")
anno_final <- column_to_rownames(anno_final)

anno_final <- anno_final[rownames(colData(spe_anno)), ]

spe_anno$celltype <- anno_final

spe_anno$celltype <- factor(spe_anno$celltype, levels = unique(spe_anno$celltype))

#saveRDS(spe_anno, "objects/spe_ca_manual.rds")


#UMAP and clustering----
spe_anno <- readRDS("objects/spe_ca_manual.rds")

dittoDimPlot(spe_anno, var = "donor_id", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

dittoDimPlot(spe_anno, var = "celltype", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

#Clean up manual annotation----
spe_man <- spe_anno[, !(spe_anno$celltype == "undefined")]

set.seed(322)
spe_man <- runUMAP(spe_man, subset_row = rowData(spe_man)$use_channel, exprs_values = "exprs")

dittoDimPlot(spe_man, var = "celltype", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

set.seed(322)
spe_man <- CATALYST::cluster(spe_man,
                             features = rownames(spe_man)[rowData(spe_und)$use_channel],
                             maxK = 30)

# Assess cluster stability
delta_area(spe_man)


spe_man$som_clusters <- cluster_ids(spe_man, "meta8")

dittoDimPlot(spe_man, var = "som_clusters",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


celltype_mean <- aggregateAcrossCells(as(spe_man, "SingleCellExperiment"),  
                                      subset.row = rownames(spe_man)[rowData(spe_man)$use_channel],
                                      ids = spe_man$som_clusters, 
                                      statistics = "mean",
                                      use.assay.type = "exprs")

dittoHeatmap(celltype_mean,
             assay = "exprs",
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("som_clusters","ncells"))


dittoHeatmap(spe_man,
             genes = rownames(spe_man)[rowData(spe_man)$use_channel],
             assay = "exprs",
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = "som_clusters")

spe_man$celltype <- spe_man$som_clusters

levels(spe_man$celltype) <- c("Perivascular_aSMA", "Microglia_CD11b", "Choriod",
                              "Macrophage_CD45CD11b", rep("Choriod", 4))

dittoDimPlot(spe_man, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


#Putting celltypes back into the big SPE
spe_anno <- cellMerge(spe_man, spe_anno)

dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = F) +
  ggtitle("Celltypes on UMAP")

#saveRDS(spe_anno, "objects/spe_ca_manual_v15.rds")


#Clean up undefined annotation----
spe_und <- spe_anno[, spe_anno$celltype == "undefined"]

set.seed(322)
spe_und <- runUMAP(spe_und, subset_row = rowData(spe_und)$use_channel, exprs_values = "exprs")

dittoDimPlot(spe_und, var = "celltype", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

set.seed(322)
spe_und <- CATALYST::cluster(spe_und,
                             features = rownames(spe_und)[rowData(spe_und)$use_channel],
                             maxK = 30)

# Assess cluster stability
delta_area(spe_und)


spe_und$som_clusters <- cluster_ids(spe_und, "meta7")

dittoDimPlot(spe_und, var = "som_clusters",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


celltype_mean <- aggregateAcrossCells(as(spe_und, "SingleCellExperiment"),  
                                      subset.row = rownames(spe_und)[rowData(spe_und)$use_channel],
                                      ids = spe_und$som_clusters, 
                                      statistics = "mean",
                                      use.assay.type = "exprs")

dittoHeatmap(celltype_mean,
             assay = "exprs",
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("som_clusters","ncells"))


dittoHeatmap(spe_und,
             genes = rownames(spe_und)[rowData(spe_und)$use_channel],
             assay = "exprs",
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = "som_clusters")

spe_und$celltype <- spe_und$som_clusters

levels(spe_und$celltype) <- c("RPE", "undefined", "Choriod", 
                              "Vimentin-pos", rep("Choriod", 3))

dittoDimPlot(spe_und, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


#Putting celltypes back into the big SPE
spe_anno <- cellMerge(spe_und, spe_anno)

dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("Celltypes on UMAP")

#saveRDS(spe_anno, "objects/spe_ca_manual_v2.rds")

#Getting CD44-rich cluster from the big dataset----
spe_anno <- readRDS("objects/spe_ca_manual_v2.rds")

set.seed(322)
spe_anno <- CATALYST::cluster(spe_anno,
                              features = rownames(spe_anno)[rowData(spe_und)$use_channel],
                              maxK = 30)

# Assess cluster stability
delta_area(spe_anno)

spe_anno$som_clusters <- cluster_ids(spe_anno, "meta20")

pdf("masked_roi_v3.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_anno,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "som_clusters")
dev.off()

levels(spe_anno$celltype) <- c(levels(spe_anno$celltype), "CD44-positive")
spe_anno$celltype[spe_anno$celltype =="undefined" & spe_anno$som_clusters == 5] <- "CD44-positive"

#saveRDS(spe_anno, "objects/spe_ca_manual_v3.rds")

#Clean up undefined annotation - round 2----
spe_anno <- readRDS("objects/spe_ca_manual_v3.rds")

spe_und <- spe_anno[, spe_anno$celltype == "undefined"]

set.seed(322)
spe_und <- runUMAP(spe_und, subset_row = rowData(spe_und)$use_channel, exprs_values = "exprs")

dittoDimPlot(spe_anno, var = "celltype", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

set.seed(322)
spe_und <- CATALYST::cluster(spe_und,
                             features = rownames(spe_und)[rowData(spe_und)$use_channel],
                             maxK = 30)

#Assess cluster stability
delta_area(spe_und)

spe_und$som_clusters <- cluster_ids(spe_und, "meta7")

dittoDimPlot(spe_und, var = "som_clusters",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")

pdf("masked_roi_und2.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_und,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "som_clusters")
dev.off()


celltype_mean <- aggregateAcrossCells(as(spe_und, "SingleCellExperiment"),  
                                      subset.row = rownames(spe_und)[rowData(spe_und)$use_channel],
                                      ids = spe_und$som_clusters, 
                                      statistics = "mean",
                                      use.assay.type = "exprs")

dittoHeatmap(celltype_mean,
             assay = "exprs",
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("som_clusters","ncells"))


dittoHeatmap(spe_und,
             genes = rownames(spe_und)[rowData(spe_und)$use_channel],
             assay = "exprs",
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = "som_clusters")

spe_und$celltype <- spe_und$som_clusters

levels(spe_und$celltype) <- c(rep("undefined", 3), "Choriod_fold",
                              "undefined", "Choriod_spec", "undefined")

dittoDimPlot(spe_und, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


#Putting celltypes back into the big SPE
spe_anno <- cellMerge(spe_und, spe_anno)

dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("Celltypes on UMAP")

#saveRDS(spe_anno, "objects/spe_ca_manual_v4.rds")


#Clean up undefined annotation - round 3----
spe_anno <- readRDS("objects/spe_ca_manual_v4.rds")

spe_und <- spe_anno[, spe_anno$celltype == "undefined"]

set.seed(322)
spe_und <- runUMAP(spe_und, subset_row = rowData(spe_und)$use_channel, exprs_values = "exprs")

dittoDimPlot(spe_und, var = "celltype", reduction.use = "UMAP", size = 0.3) +
  ggtitle("Donor ID on UMAP")

set.seed(322)
spe_und <- CATALYST::cluster(spe_und,
                             features = rownames(spe_und)[rowData(spe_und)$use_channel],
                             maxK = 30)

#Assess cluster stability
delta_area(spe_und)

spe_und$som_clusters <- cluster_ids(spe_und, "meta8")

dittoDimPlot(spe_und, var = "som_clusters",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")

pdf("masked_roi_und3.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_und,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "som_clusters")
dev.off()


celltype_mean <- aggregateAcrossCells(as(spe_und, "SingleCellExperiment"),  
                                      subset.row = rownames(spe_und)[rowData(spe_und)$use_channel],
                                      ids = spe_und$som_clusters, 
                                      statistics = "mean",
                                      use.assay.type = "exprs")

dittoHeatmap(celltype_mean,
             assay = "exprs",
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("som_clusters","ncells"))


dittoHeatmap(spe_und,
             genes = rownames(spe_und)[rowData(spe_und)$use_channel],
             assay = "exprs",
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = "som_clusters")

spe_und$celltype <- spe_und$som_clusters

levels(spe_und$celltype) <- c(rep("undefined", 3), "Choriod_fold",
                              "undefined", "Choriod_spec", "undefined")

dittoDimPlot(spe_und, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")


#Putting celltypes back into the big SPE
spe_anno <- cellMerge(spe_und, spe_anno)

dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("Celltypes on UMAP")

#saveRDS(spe_anno, "objects/spe_ca_manual_v5.rds")

#Generify the choriod----
spe_anno <- readRDS("objects/spe_ca_manual_v5.rds")

#Replace "Choriod_fold", "Choriod_spec" with "Choriod"
levels(spe_anno$celltype)[9:10] <- "Choriod"

#saveRDS(spe_anno, "objects/spe_ca_manual_v5_g.rds")

#Total mask plots----
masks  <- loadImages("masks/", as.is = TRUE)
sample_id <- names(masks)

meta_final <- spe_anno@metadata$meta_final
donor_id <- meta_final$donor_id[match(sample_id, meta_final$sample_id)]
condition <- meta_final$condition[match(sample_id, meta_final$sample_id)] 

mcols(masks) <- DataFrame(sample_id = sample_id,
                          donor_id = donor_id,
                          condition = condition)


pdf("umap_25clusters.pdf", width = 9, height = 8)
dittoDimPlot(spe_anno, var = "som_clusters",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) +
  ggtitle("SOM clusters on UMAP")
dev.off()

pdf("heatmap_25clusters.pdf", width = 8, height = 8)
dittoHeatmap(celltype_mean,
             assay = "exprs",
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("som_clusters","ncells"))
dev.off()

#Manual layer annotation----
spe_anno <- readRDS("objects/spe_ca_manual_v5_g.rds")
test_coord <- spatialCoords(spe_anno)

ordered_m <- read.table("../qproject_layers/measurements.tsv", sep = "\t", header = T)

annotated_m <- read.table("../qproject_layers/measurements_annotated.tsv", sep = "\t", header = T)

annotated_m <- annotated_m[order(match(annotated_m$Object.ID,ordered_m$Object.ID)), ]

spe_anno$celltype_marker <- spe_anno$celltype
spe_anno$celltype_manual <- annotated_m$Parent
spe_anno$celltype_manual <- factor(spe_anno$celltype_manual, levels = unique(spe_anno$celltype_manual))

dittoDimPlot(spe_anno, var = "celltype_manual",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) 

dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = TRUE) 

#Replacing the unedfined with manually annotated cells
anno_rename <- data.frame(row.names = colnames(spe_anno),
                          marker = spe_anno$celltype,
                          manual = spe_anno$celltype_manual)

levels(anno_rename$marker)[levels(anno_rename$marker) %in% c("Choriod", "RPE")] <- "Choriod_RPE"
levels(anno_rename$marker) <- gsub("-", "_", levels(anno_rename$marker), fixed = T)

levels(anno_rename$marker) <- c(levels(anno_rename$marker), "ONL", "INL", "GCL")

anno_rename$manual <- gsub("Root object (Image)", "undefined", anno_rename$manual, fixed = T)
anno_rename$manual <- gsub("Choriod-RPE complex", "Choriod_RPE", anno_rename$manual, fixed = T)

#Replace the undefined cells with manually annotated ones
anno_rename$marker[anno_rename$marker == "undefined"] <- 
  anno_rename$manual[anno_rename$marker == "undefined"]
#On some ROI the cells are too bright - replace the Choriod ones too to rescue the layers
anno_rename$marker[anno_rename$marker == "Choriod_RPE"] <- 
  anno_rename$manual[anno_rename$marker == "Choriod_RPE"]
#Most prominent on 231113_yuebing_day1_002, Yuebing_slide2_003 and Yuebing_slide3_008
  
spe_anno$celltype <- anno_rename$marker

pdf("figures/masked_roi_alternative.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_anno,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "celltype",
          colour = list(celltype = c(Choriod_RPE = "#a6cee3",
                                     CD44_positive = "#1f78b4",
                                     ONL = "#b2df8a",
                                     Vimentin_pos = "#33a02c",
                                     INL = "#fb9a99",
                                     GCL = "#c23b22",
                                     undefined = "grey",
                                     Microglia_CD11b = "#ff7f00",
                                     Macrophage_CD45CD11b = "#d6ca6f",
                                     Perivascular_aSMA = "#966fd6")))

dev.off()

#saveRDS(spe_anno, "objects/spe_ca_manual_final.rds")

#Alternative annotation with obsoleting Vimentin-pos cells----
#Replace the undefined cells with manually annotated ones
anno_rename$marker[anno_rename$marker %in% c("undefined", "Vimentin_pos")] <- 
  anno_rename$manual[anno_rename$marker %in% c("undefined", "Vimentin_pos")]
#On some ROI the cells are too bright - replace the Choriod ones too to rescue the layers
anno_rename$marker[anno_rename$marker == "Choriod_RPE"] <- 
  anno_rename$manual[anno_rename$marker == "Choriod_RPE"]
#Most prominent on 231113_yuebing_day1_002, Yuebing_slide2_003 and Yuebing_slide3_008

spe_anno$celltype <- anno_rename$marker
spe_anno$celltype <- droplevels(spe_anno$celltype)

#saveRDS(spe_anno, "objects/spe_ca_manual_alternative.rds")
#spe_anno <- readRDS("objects/spe_ca_manual_alternative.rds")

pdf("figures/boxplot_celltypes_vimentin.pdf", width = 8, height = 8)
dittoBoxPlot(spe_anno, "Vimentin",
             split.by = "celltype",
             group.by = "condition",
             color.panel = rep("white", 4),
             boxplot.width = 0.7,
             theme = theme_bw()) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  ylim(c(0, 25))
dev.off()

#Metadata cleanup----
spe_anno <- readRDS("objects/spe_sa.rds")

spe_anno$condition <- factor(spe_anno$condition, levels = c("BC", "LC", "Fas", "Bel"))
levels(spe_anno$condition)[1] <- "C"

#Original names: "Choriod-RPE complex", "undefined", "Microglia_CD11b", "Perivascular_aSMA", 
#"Macrophage_CD45CD11b", "Vimentin-pos", "CD44-positive", "ONL", "INL", "GCL"     
levels(spe_anno$celltype) <- c("Choroid-RPE complex", "Unspecific cell types",
                               "Microglia", "Perivascular cells", "Macrophages",
                               "Macroglia", "Retinal progenitor cells", "Photoreceptors", 
                               "Interneurons", "Ganglion cells")

spe_anno$celltype <- fct_relevel(spe_anno$celltype, 
                                 c("Choroid-RPE complex", "Perivascular cells", "Macroglia",
                                   "Microglia", "Macrophages", "Retinal progenitor cells", "Photoreceptors", 
                                   "Interneurons", "Ganglion cells", "Unspecific cell types"))

#Rename condition
meta_final <- spe_anno@metadata$meta_final
meta_final$condition <- gsub("BC", "C", meta_final$condition, fixed = T)

#Celltype boxplot----
celltype_table <- data.frame(table(spe_anno$sample_id, spe_anno$celltype))
colnames(celltype_table) <- c("Sample", "Celltype", "Number")
celltype_table$Condition <- meta_final$condition[match(celltype_table$Sample, meta_final$sample_id)]
celltype_table$Condition <- factor(celltype_table$Condition, levels = c("C", "LC", "Fas", "Bel"))

#12x4
ggplot(celltype_table, aes(x = Condition, y = Number)) +
  geom_boxplot() +
  theme_bw() +
  facet_wrap(.~Celltype, scales = "free", ncol = 5)

colors_alt <- c("#a6cee3", "#966fd6", "#ff7f00", "#d6ca6f", 
                "#1f78b4", "#b2df8a", "#fb9a99", "#33a02c", "#a9a9a9")

strip <- strip_themed(background_x = elem_list_rect(fill = colors_alt))

pdf("figures/boxplot_celltypes_alternative.pdf", width = 7, height = 7)
ggplot(celltype_table, aes(x = Condition, y = Number)) +
  geom_boxplot() +
  theme_bw() +
  #  scale_y_continuous(labels = scales::percent) +
  facet_wrap2(.~Celltype, scales = "free", strip = strip)
dev.off()

#6x8
dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.2,
             do.label = T,
             color.panel = c("#a6cee3", "#966fd6", "#33a02c", "#ff7f00", "#d6ca6f", 
                             "#1f78b4", "#b2df8a", "#fb9a99", "#c23b22", "#a9a9a9")) 

pdf("figures/umap_alternative.pdf", width = 10, height = 8)
dittoDimPlot(spe_anno, var = "celltype",
             reduction.use = "UMAP", size = 0.3,
             do.label = T,
             color.panel = colors_alt) 
dev.off()

pdf("figures/barplot_celltype_alternative.pdf", width = 5, height = 4)
dittoBarPlot(spe_anno, var = "celltype", group.by = "condition",
             color.panel = c("#a6cee3", "#33a02c", "#fb9a99", "#d6ca6f", "#ff7f00", "#966fd6",
                             "#b2df8a", "#1f78b4", "#a9a9a9")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))
dev.off()

metadata <- spe_anno@metadata$meta_final
metadata$condition <- factor(metadata$condition, levels = c("C", "LC", "Fas", "Bel"))

pdf("figures/barplot_samples_alternative.pdf", width = 12, height = 4)
dittoBarPlot(spe_anno, var = "celltype", group.by = "sample_id",
             color.panel = c("#a6cee3", "#33a02c", "#fb9a99", "#d6ca6f", "#ff7f00", "#966fd6",
                             "#b2df8a", "#1f78b4", "#a9a9a9"),
             x.reorder = order(metadata$condition))
dev.off()

#Marker expression heatmap
celltype_mean <- aggregateAcrossCells(as(spe_anno, "SingleCellExperiment"),  
                                      subset.row = rownames(spe_anno)[rowData(spe_anno)$use_channel],
                                      ids = spe_anno$celltype, 
                                      statistics = "mean",
                                      use.assay.type = "exprs")

channels_use <- rownames(spe_anno)[rowData(spe_anno)$use_channel]
channels_use <- channels_use[!(channels_use %in% c("Arginase_perk", "iNOS_CD206"))]

pdf("figures/heatmap_celltype_alt.pdf", width = 8, height = 6)
dittoHeatmap(celltype_mean,
             assay = "exprs",
             genes = channels_use,
             cluster_cols = TRUE, 
             scaled.to.max = TRUE,
             annot.colors = colors_alt,
             heatmap.colors.max.scaled = inferno(100),
             annot.by = c("celltype","ncells"))
dev.off()

#Celltype per condition barplot 6x5
dittoBarPlot(spe_anno, var = "celltype", group.by = "condition", 
             x.reorder = c(2, 4, 3, 1))

#Ordered sample composition barplot
meta_final <- spe_anno@metadata$meta_final
meta_final$condition <- gsub("BC", "C", meta_final$condition, fixed = T)

dittoBarPlot(spe_anno, var = "celltype", group.by = "sample_id",
             x.reorder = order(match(meta_final$condition, c("C", "LC", "Fas", "Bel"))))


#Neighborhood mask plots
pdf("masked_neighborhoods_celltype.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_anno,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "cn_celltype")
dev.off()

for_plot <- colData(spe_anno) %>% as_tibble() %>%
  group_by(cn_celltype, celltype) %>%
  summarize(count = n()) %>%
  mutate(freq = count / sum(count)) %>%
  pivot_wider(id_cols = cn_celltype, names_from = celltype,
              values_from = freq, values_fill = 0) %>%
  ungroup() %>%
  select(-cn_celltype)
#Make the heatmap

pdf("heatmap_neighborhoods_celltype.pdf", width = 7, height = 6)
pheatmap(for_plot,
         color = colorRampPalette(c("dark blue", "white", "dark red"))(100),
         scale = "column",
         main = "Cell type composition of celltype-based neighbourhoods")
dev.off()

pdf("masked_neighborhoods_expression.pdf", width = 16, height = 16)
plotCells(masks,
          object = spe_anno,
          cell_id = "ObjectNumber",
          img_id = "sample_id",
          colour_by = "cn_expression")
dev.off()

for_plot <- colData(spe_anno) %>% as_tibble() %>%
  group_by(cn_expression, celltype) %>%
  summarize(count = n()) %>%
  mutate(freq = count / sum(count)) %>%
  pivot_wider(id_cols = cn_expression, names_from = celltype,
              values_from = freq, values_fill = 0) %>%
  ungroup() %>%
  select(-cn_expression)
#Make the heatmap

pdf("heatmap_neighborhoods_expression.pdf", width = 7, height = 6)
pheatmap(for_plot,
         color = colorRampPalette(c("dark blue", "white", "dark red"))(100),
         scale = "column",
         main = "Cell type composition of expression-based neighbourhoods")
dev.off()

celltype_data <- data.frame(table(spe_anno$celltype, spe_anno$cn_celltype, spe_anno$sample_id))


#saveRDS(spe_anno, "objects/spe_sa_final.rds")

spe_anno <- readRDS("objects/spe_sa_final.rds")

#Differential abundance testing----
abundances <- table(spe_anno$celltype, spe_anno$donor_id)
extra <- colData(spe_anno)[match(colnames(abundances), spe_anno$donor_id),]
y.ab <- DGEList(abundances, samples = extra)

#No intercept model for enabling all comparisons/not selecting reference level
da_design <- model.matrix(~0 + factor(condition), y.ab$samples)
#Remove "factor(condition)" from colnames
colnames(da_design) <- sub("factor(condition)", "", colnames(da_design), fixed = TRUE)

y.ab <- estimateDisp(y.ab, da_design, trend = "none")
fit.ab <- glmQLFit(y.ab, da_design, robust = TRUE, abundance.trend = FALSE)

plotQLDisp(fit.ab, cex = 1)

comparisons <- c("C-LC", "C-Fas", "C-Bel", "LC-Fas", "LC-Bel", "Fas-Bel")

da_results <- lapply(X = comparisons, 
                     FUN = function(x) {
                       da_contrast <- makeContrasts(contrasts = x, levels = da_design)
                       
                       #Testing for differences
                       da_results <- glmQLFTest(fit.ab, contrast = da_contrast)
                       #Making a results table
                       da_table <- as.data.frame(topTags(da_results, n = length(levels(spe_anno$celltype))))
                       
                       da_table <- da_table[order(da_table$FDR), ]
                       
                       da_table
                     })

names(da_results) <- comparisons

write.xlsx(x = da_results, "differential_abundance.xlsx", rowNames = TRUE)

#Quantify Collagen/Fibronectin in Choroid/RPE cells----
spe_anno <- readRDS("objects/spe_ca_manual_alternative.rds")

dittoBoxPlot(spe_anno, c("CollagenT1"), group.by = "celltype", split.by = "condition", boxplot.width = 0.5)
dittoBoxPlot(spe_anno, c("Fibronectin"), group.by = "celltype", split.by = "condition", boxplot.width = 0.5)
dittoBoxPlot(spe_anno, c("Vimentin"), group.by = "celltype", split.by = "condition", boxplot.width = 0.5)

#Remake the boxplots
counts <- t(assay(spe_anno, "exprs"))
counts <- counts[, rowData(spe_anno)$use_channel]
counts <- data.frame(counts)
counts <- cbind(counts, Celltype = as.character(spe_anno$celltype))
counts <- cbind(counts, Condition = as.character(spe_anno$condition))
counts$Condition <- factor(counts$Condition)

counts$Condition <- fct_relevel(counts$Condition, 
                                 c("C", "LC", "Fas", "Bel"))

png("figures/col_boxplot.png", width = 1500, height = 700, res = 150)
ggplot(counts, aes(x = Celltype, y = CollagenT1, color = Condition)) +
  geom_boxplot() +
  theme_bw() +
  ylab("Type 1 Collagen") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
dev.off()

png("figures/fib_boxplot.png", width = 1500, height = 700, res = 150)
ggplot(counts, aes(x = Celltype, y = Fibronectin, color = Condition)) +
  geom_boxplot() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
dev.off()

png("figures/vim_boxplot.png", width = 1500, height = 700, res = 150)
ggplot(counts, aes(x = Celltype, y = Vimentin, color = Condition)) +
  geom_boxplot() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
dev.off()

#Differential state analysis----
#Get the transformed counts
acounts <- t(assay(spe_anno, name = "exprs"))
#Get a guide index of sample IDs from rownames
guide <- sub("_[^_]+$", "", colnames(spe_anno))
#Split into a list of count matrices based on sample iDs
acounts <- split.data.frame(acounts, f = guide)
#Get experiment info from metadata
experiment_info <- spe_anno@metadata$meta_final
#Gather marker info
marker_info <- as.data.frame(rowData(spe_anno))
marker_info <- marker_info[, c("channel", "name", "use_channel")]

#As opposed to CATALYST, limma uses "state" markers instead of "type" for analysis.
#As such, simply using the clustering metadata column "marker_class" will be insufficient
#at the very least because the markers are flipped, and also user may choose to skip that step.
#So we use "use_channel" column which is always defined with data preparation step instead.
marker_info$use_channel <- ifelse(marker_info$use_channel, "state", "type")

colnames(marker_info) <- c("channel_name", "marker_name", "marker_class")

#Prepare the object
d_se <- prepareData(acounts, experiment_info, marker_info)

#Create pseudo-clustering results by embedding celltype annotations
rowData(d_se)$cluster_id <- spe_anno$celltype

#Get counts
d_counts <- calcCounts(d_se)
#Get medians
d_medians <- calcMedians(d_se)

#No intercept model for enabling all comparisons/not selecting reference level
ds_design <- model.matrix(~0 + factor(condition), experiment_info)
#Remove "factor(condition)" from colnames
colnames(ds_design) <- sub("factor(condition)", "", colnames(ds_design), fixed = TRUE)

#Create standard contrast
contrast_list <- c("C-LC", "C-Fas", "C-Bel", "LC-Fas", "LC-Bel", "Fas-Bel")

results_list <- lapply(contrast_list, function(comparison) {
  ds_contrast <- makeContrasts(contrasts = "contrast_list", levels = ds_design)
  
  #Performing the DS analysis
  res_ds <- testDS_limma(d_counts, d_medians, ds_design, ds_contrast)
  
  #Getting the results summary
  ds_table <- as.data.frame(rowData(res_ds))
  ds_table <- ds_table[which(ds_table$p_adj < 0.05), ]
  
  return(ds_table)
})

names(results_list) <- contrast_list

write.xlsx(results_list, "differential_state.xlsx")
