library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────── tidyverse 1.3.0 ──
✓ tibble  3.0.6     ✓ purrr   0.3.4
✓ tidyr   1.1.2     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.1
── Conflicts ─────────── tidyverse_conflicts() ──
x purrr::%@%()         masks rlang::%@%()
x purrr::as_function() masks rlang::as_function()
x dplyr::collapse()    masks IRanges::collapse()
x dplyr::combine()     masks Biobase::combine(), BiocGenerics::combine()
x dplyr::count()       masks matrixStats::count()
x dplyr::desc()        masks IRanges::desc()
x tidyr::expand()      masks S4Vectors::expand()
x rlang::exprs()       masks Biobase::exprs()
x dplyr::filter()      masks stats::filter()
x dplyr::first()       masks S4Vectors::first()
x purrr::flatten()     masks rlang::flatten()
x purrr::flatten_chr() masks rlang::flatten_chr()
x purrr::flatten_dbl() masks rlang::flatten_dbl()
x purrr::flatten_int() masks rlang::flatten_int()
x purrr::flatten_lgl() masks rlang::flatten_lgl()
x purrr::flatten_raw() masks rlang::flatten_raw()
x purrr::invoke()      masks rlang::invoke()
x dplyr::lag()         masks stats::lag()
x purrr::list_along()  masks rlang::list_along()
x purrr::modify()      masks rlang::modify()
x ggplot2::Position()  masks BiocGenerics::Position(), base::Position()
x purrr::prepend()     masks rlang::prepend()
x purrr::reduce()      masks GenomicRanges::reduce(), IRanges::reduce()
x dplyr::rename()      masks S4Vectors::rename()
x purrr::simplify()    masks DelayedArray::simplify()
x dplyr::slice()       masks IRanges::slice()
x purrr::splice()      masks rlang::splice()
PrctCellExpringGene <- function(object, genes, group.by = "all"){
  if(group.by == "all"){
    prct = unlist(lapply(genes,calc_helper, object=object))
    result = data.frame(Markers = genes, Cell_proportion = prct)
    return(result)
  }
  
  else{        
    list = SplitObject(object, group.by)
    factors = names(list)
    
    results = lapply(list, PrctCellExpringGene, genes=genes)
    for(i in 1:length(factors)){
      results[[i]]$Feature = factors[i]
    }
    combined = do.call("rbind", results)
    return(combined)
  }
}

calc_helper <- function(object,genes){
  counts = object[['RNA']]@counts
  ncells = ncol(counts)
  if(genes %in% row.names(counts)){
    sum(counts[genes,]>0)/ncells
  }else{return(NA)}
}

part one

genes_10 <- genes_10prct$Markers
Error: object 'genes_10prct' not found

PART TWO

Order slender cells with trajectory analysis

tryp_subset <- subset(tryp, subset = cluster == c("LS A", "LS B"))
longer object length is not a multiple of shorter object lengthlonger object length is not a multiple of shorter object length
tryp_subset <- subset(tryp, subset = cluster == c("LS A", "LS B"))
longer object length is not a multiple of shorter object lengthlonger object length is not a multiple of shorter object length
tryp_subset <- RunUMAP(tryp_subset, dims = 1:8, reduction = "pca", min.dist = 0.1)
12:55:33 UMAP embedding parameters a = 1.577 b = 0.8951
12:55:33 Read 996 rows and found 8 numeric columns
12:55:33 Using Annoy for neighbor search, n_neighbors = 30
12:55:33 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:55:33 Writing NN index file to temp file /var/folders/rd/tjq29jm572b7j_j3fg5k_s340000gn/T//RtmpyjyWIo/file14b5387342dd
12:55:33 Searching Annoy index using 1 thread, search_k = 3000
12:55:33 Annoy recall = 100%
12:55:34 Commencing smooth kNN distance calibration using 1 thread
12:55:36 Initializing from normalized Laplacian + noise
12:55:36 Commencing optimization for 500 epochs, with 40024 positive edges
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:55:38 Optimization finished
tryp_subset <- subset(tryp, subset = cluster == c("LS A", "LS B"))
longer object length is not a multiple of shorter object lengthlonger object length is not a multiple of shorter object length
tryp_subset <- RunUMAP(tryp_subset, dims = 1:8, reduction = "pca", min.dist = 0.1)
12:55:39 UMAP embedding parameters a = 1.577 b = 0.8951
12:55:39 Read 996 rows and found 8 numeric columns
12:55:39 Using Annoy for neighbor search, n_neighbors = 30
12:55:39 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:55:39 Writing NN index file to temp file /var/folders/rd/tjq29jm572b7j_j3fg5k_s340000gn/T//RtmpyjyWIo/file14b5172ad81f
12:55:39 Searching Annoy index using 1 thread, search_k = 3000
12:55:39 Annoy recall = 100%
12:55:40 Commencing smooth kNN distance calibration using 1 thread
12:55:42 Initializing from normalized Laplacian + noise
12:55:42 Commencing optimization for 500 epochs, with 40024 positive edges
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:55:45 Optimization finished
DimPlot(tryp_subset, reduction = "umap", label = FALSE,
        label.size = 4,
        pt.size = 0.5, group.by = "Phase", cols = mycolours) + UMAP_theme
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Run `rlang::last_error()` to see where the error occurred.

Trajectory inference with slingshot

lines(SlingshotDataSet(sce), col = "black", lwd = 2)
Error in plot.xy(xy.coords(x, y), type = type, ...) : 
  plot.new has not been called yet

PART THREE

Conduct peudotime analysis with the cycling cells, in my data the slender clusters

plot(rd, pch=16, col = mycolours[as.factor(tryp_subset$Phase)] ) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), panel.background = element_blank(),
        plot.title = element_text(size = 10, face = "bold"), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 9), axis.title.y = element_text(size = 9), axis.text.y =  element_text(size = 8))
NULL

plot(rd, pch=16, col = mycolours[as.factor(tryp_subset$Phase)] ) + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = “black”), panel.background = element_blank(), plot.title = element_text(size = 10, face = “bold”), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 9), axis.title.y = element_text(size = 9), axis.text.y = element_text(size = 8))

lines(x=pcc\(s[order(pcc\)lambda),1], y=pcc\(s[order(pcc\)lambda),2], col=“black”, lwd=2)

gamList <- fitGAM(counts, pseudotime=pseudoT, cellWeights=cWeights, nknots=5)

  |                                                  | 0 % ~calculating  
  |+                                                 | 1 % ~39m 14s      
  |+                                                 | 2 % ~28m 37s      
  |++                                                | 3 % ~21m 54s      
  |++                                                | 4 % ~18m 55s      
  |+++                                               | 5 % ~17m 16s      
  |+++                                               | 6 % ~15m 28s      
  |++++                                              | 7 % ~14m 47s      
  |++++                                              | 8 % ~14m 19s      
  |+++++                                             | 9 % ~13m 23s      
  |+++++                                             | 10% ~12m 41s      
  |++++++                                            | 11% ~12m 05s      
  |++++++                                            | 12% ~11m 28s      
  |+++++++                                           | 13% ~11m 19s      
  |+++++++                                           | 14% ~10m 58s      
  |++++++++                                          | 15% ~10m 36s      
  |++++++++                                          | 16% ~10m 19s      
  |+++++++++                                         | 17% ~10m 21s      
  |+++++++++                                         | 18% ~10m 01s      
  |++++++++++                                        | 19% ~10m 53s      
  |++++++++++                                        | 20% ~10m 40s      
  |+++++++++++                                       | 21% ~10m 23s      
  |+++++++++++                                       | 22% ~10m 10s      
  |++++++++++++                                      | 23% ~09m 55s      
  |++++++++++++                                      | 24% ~10m 22s      
  |+++++++++++++                                     | 25% ~10m 22s      
  |+++++++++++++                                     | 26% ~10m 03s      
  |++++++++++++++                                    | 27% ~09m 49s      
  |++++++++++++++                                    | 28% ~09m 31s      
  |+++++++++++++++                                   | 29% ~09m 20s      
  |+++++++++++++++                                   | 30% ~09m 04s      
  |++++++++++++++++                                  | 31% ~08m 50s      
  |++++++++++++++++                                  | 32% ~08m 38s      
  |+++++++++++++++++                                 | 33% ~08m 24s      
  |+++++++++++++++++                                 | 34% ~08m 16s      
  |++++++++++++++++++                                | 35% ~08m 03s      
  |++++++++++++++++++                                | 36% ~07m 52s      
  |+++++++++++++++++++                               | 37% ~07m 46s      
  |+++++++++++++++++++                               | 38% ~07m 34s      
  |++++++++++++++++++++                              | 39% ~07m 23s      
  |++++++++++++++++++++                              | 40% ~07m 11s      
  |+++++++++++++++++++++                             | 41% ~07m 00s      
  |+++++++++++++++++++++                             | 42% ~06m 49s      
  |++++++++++++++++++++++                            | 43% ~06m 39s      
  |++++++++++++++++++++++                            | 44% ~06m 29s      
  |+++++++++++++++++++++++                           | 45% ~06m 27s      
  |+++++++++++++++++++++++                           | 46% ~06m 28s      
  |++++++++++++++++++++++++                          | 47% ~06m 20s      
  |++++++++++++++++++++++++                          | 48% ~06m 10s      
  |+++++++++++++++++++++++++                         | 49% ~05m 60s      
  |+++++++++++++++++++++++++                         | 50% ~05m 51s      
  |++++++++++++++++++++++++++                        | 51% ~05m 41s      
  |++++++++++++++++++++++++++                        | 52% ~05m 32s      
  |+++++++++++++++++++++++++++                       | 53% ~05m 23s      
  |+++++++++++++++++++++++++++                       | 54% ~05m 15s      
  |++++++++++++++++++++++++++++                      | 55% ~05m 33s      
  |++++++++++++++++++++++++++++                      | 56% ~05m 23s      
  |+++++++++++++++++++++++++++++                     | 57% ~05m 17s      
  |+++++++++++++++++++++++++++++                     | 58% ~05m 09s      
  |++++++++++++++++++++++++++++++                    | 59% ~05m 01s      
  |++++++++++++++++++++++++++++++                    | 60% ~04m 52s      
  |+++++++++++++++++++++++++++++++                   | 61% ~04m 44s      
  |+++++++++++++++++++++++++++++++                   | 62% ~04m 41s      
  |++++++++++++++++++++++++++++++++                  | 63% ~04m 35s      
  |++++++++++++++++++++++++++++++++                  | 64% ~04m 26s      
  |+++++++++++++++++++++++++++++++++                 | 65% ~04m 18s      
  |+++++++++++++++++++++++++++++++++                 | 66% ~04m 10s      
  |++++++++++++++++++++++++++++++++++                | 67% ~04m 02s      
  |++++++++++++++++++++++++++++++++++                | 68% ~03m 53s      
  |+++++++++++++++++++++++++++++++++++               | 69% ~03m 45s      
  |+++++++++++++++++++++++++++++++++++               | 70% ~03m 39s      
  |++++++++++++++++++++++++++++++++++++              | 71% ~03m 31s      
  |++++++++++++++++++++++++++++++++++++              | 72% ~03m 29s      
  |+++++++++++++++++++++++++++++++++++++             | 73% ~03m 28s      
  |+++++++++++++++++++++++++++++++++++++             | 74% ~03m 26s      
  |++++++++++++++++++++++++++++++++++++++            | 75% ~03m 23s      
  |++++++++++++++++++++++++++++++++++++++            | 76% ~03m 19s      
  |+++++++++++++++++++++++++++++++++++++++           | 77% ~03m 18s      
  |+++++++++++++++++++++++++++++++++++++++           | 78% ~03m 15s      
  |++++++++++++++++++++++++++++++++++++++++          | 79% ~03m 05s      
  |++++++++++++++++++++++++++++++++++++++++          | 80% ~02m 57s      
  |+++++++++++++++++++++++++++++++++++++++++         | 81% ~02m 47s      
  |+++++++++++++++++++++++++++++++++++++++++         | 82% ~02m 37s      
  |++++++++++++++++++++++++++++++++++++++++++        | 83% ~02m 28s      
  |++++++++++++++++++++++++++++++++++++++++++        | 84% ~02m 18s      
  |+++++++++++++++++++++++++++++++++++++++++++       | 85% ~02m 09s      
  |+++++++++++++++++++++++++++++++++++++++++++       | 86% ~01m 60s      
  |++++++++++++++++++++++++++++++++++++++++++++      | 87% ~01m 51s      
  |++++++++++++++++++++++++++++++++++++++++++++      | 88% ~01m 42s      
  |+++++++++++++++++++++++++++++++++++++++++++++     | 89% ~01m 33s      
  |+++++++++++++++++++++++++++++++++++++++++++++     | 90% ~01m 24s      
  |++++++++++++++++++++++++++++++++++++++++++++++    | 91% ~01m 15s      
  |++++++++++++++++++++++++++++++++++++++++++++++    | 92% ~01m 07s      
  |+++++++++++++++++++++++++++++++++++++++++++++++   | 93% ~58s          
  |+++++++++++++++++++++++++++++++++++++++++++++++   | 94% ~50s          
  |++++++++++++++++++++++++++++++++++++++++++++++++  | 95% ~41s          
  |++++++++++++++++++++++++++++++++++++++++++++++++  | 96% ~33s          
  |+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~25s          
  |+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~17s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~09s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=15m 09s

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "Cell cycling analysis of bloodstream for trypanosoma brucei"
output: html_notebook
---

```{r}
library(princurve)
library(tradeSeq)
library(cowplot)
library(clusterExperiment)
library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(dplyr)
library(rlang)
library(grid)
library(ggridges)
library(tidyverse)
library(slingshot)
library(ggridges)

# Require bar height map function for one heatmap, provided seperately 

# Plotting theme for graph asthetics
UMAP_theme <- theme(axis.line=element_blank(), axis.ticks = element_blank(),  panel.background = element_rect(size=0.5,linetype="solid",color="black"), plot.title = element_text(size = 10, face = "bold", hjust = 0.05, vjust = -8), axis.text.x = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y =  element_blank(), legend.title = element_blank())

```

```{r}
PrctCellExpringGene <- function(object, genes, group.by = "all"){
  if(group.by == "all"){
    prct = unlist(lapply(genes,calc_helper, object=object))
    result = data.frame(Markers = genes, Cell_proportion = prct)
    return(result)
  }
  
  else{        
    list = SplitObject(object, group.by)
    factors = names(list)
    
    results = lapply(list, PrctCellExpringGene, genes=genes)
    for(i in 1:length(factors)){
      results[[i]]$Feature = factors[i]
    }
    combined = do.call("rbind", results)
    return(combined)
  }
}

calc_helper <- function(object,genes){
  counts = object[['RNA']]@counts
  ncells = ncol(counts)
  if(genes %in% row.names(counts)){
    sum(counts[genes,]>0)/ncells
  }else{return(NA)}
}
```

### part one ###

```{r}
## Assign cell cycle stage using marker genes
Cell_cycle_regulated_genes <- read.delim("Cell_cycle_regulated_genes.txt")

## Can use the provided integrated seurat object or output from script "WT_differentiation_scRNA-seq"
load("WT_integrated_seurat_object")
# Rename object 
tryp <- WT.integrated
DimPlot(tryp)
```


```{r}
# Get all the genes identified in the data set that are detected in atleast 10% of the cells
genes <- tryp@assays[["integrated"]]@data
genes <- genes@Dimnames[[1]]
cell_prct <- PrctCellExpringGene(tryp, genes = genes, group.by = "all")
genes_10prct <- subset(cell_prct, subset = cell_prct$Cell_proportion > 0.1)
genes_10 <- genes_10prct$Markers

# Get list of marker genes present in atleast 10% cells for each phase

Cell_cycle_regulated_genes <- read.delim("PATH/Cell_cycle_regulated_genes.txt")

s.genes <- subset(Cell_cycle_regulated_genes, Cell_cycle_regulated_genes$S.phase %in% genes_10)
s.genes <- s.genes$S.phase

g2m.genes <- subset(Cell_cycle_regulated_genes, Cell_cycle_regulated_genes$G2.M.phase %in% genes_10)
g2m.genes <- g2m.genes$G2.M.phase

early.g1.genes <- subset(Cell_cycle_regulated_genes, Cell_cycle_regulated_genes$Early.G1 %in% genes_10)
early.g1.genes <- early.g1.genes$Early.G1

late.g1.genes <- subset(Cell_cycle_regulated_genes, Cell_cycle_regulated_genes$Late.G1 %in% genes_10)
late.g1.genes <- late.g1.genes$Late.G1

```

```{r}
# Calculate an expression score for each phase and save it to the seurat object
tryp <- MetaFeature(tryp, features = s.genes, meta.name = "S.aggregate")
tryp <- MetaFeature(tryp, features = g2m.genes, meta.name = "G2M.aggregate")
tryp <- MetaFeature(tryp, features = early.g1.genes, meta.name = "Early.G1.aggregate")
tryp <- MetaFeature(tryp, features = late.g1.genes, meta.name = "Late.G1.aggregate")

# Creat and dataframe with the expression score of each cell and each phase
df <- data.frame(tryp@meta.data[["S.aggregate"]], tryp@meta.data[["G2M.aggregate"]], tryp@meta.data[["Early.G1.aggregate"]], tryp@meta.data[["Late.G1.aggregate"]])
colnames(df) <- c("S", "G2M", "Early G1", "Late G1")
rownames(df) <- tryp@assays[["integrated"]]@data@Dimnames[[2]]
# Find ratio between the score and average
df$S.ratio <- df$S / mean(df$S)
df$G2M.ratio <- df$G2M / mean(df$G2M)
df$Early_G1.ratio <- df$`Early G1` / mean(df$`Early G1`)
df$Late_G1.ratio <- df$`Late G1` / mean(df$`Late G1`)

# Find the top scoring phase of each cell, with FC > 1.5. 
assignments <- apply(
  X = df[, 5:8],
  MARGIN = 1,
  FUN = function(scores, first = 'S', second = 'G2M', third = "Early G1", fourth = "Late G1", null = 'Non-cycling') {
    if (all(scores < 1.5)) {
      return(null)
    } else {
      if (length(which(x = scores == max(scores))) > 1) {
        return('Undecided')
      } else {
        return(c(first, second, third, fourth)[which(x = scores == max(scores))])
      }
    }
  }    
)

df$Phase <- assignments
df$Cluster <- tryp@active.ident

head(df)

#write.csv(df, file = "Cell_cycle_phase_scores.csv")

```

```{r}
mycolours <- c("#f8766d", "#7cae00", "#01bfc4", "grey", "#c77cff")
tryp$Phase <- as.vector(assignments)
p <- DimPlot(object = tryp, group.by = "Phase", reduction = "umap", pt.size = 0.5, cols = mycolours) + UMAP_theme
p[[1]]$layers[[1]]$aes_params$alpha = 0.8
p[[1]]$layers[[1]]$aes_params$shape = 16
p

#pdf(file = "cycle_WT_umap.pdf", width = 3.7, height = 2.5)
#p
#dev.off()
```

```{r}

cell_proportions <- as.data.frame(prop.table(table(tryp$Phase, tryp@active.ident), margin = 2))
write.csv(cell_proportions, file = "cell_proportions_phase_mutant_integration_testing_replicate.csv")

ggplot(data=cell_proportions, aes(x=cell_proportions$Var2, y=cell_proportions$Freq, fill=cell_proportions$Var1)) + geom_bar(stat="identity", color="black") + labs(x="sample", y="Proportion of Cells", fill="Cluster")


```

```{r}

## Heatmap for cycling genes

cycling_genes <- c(as.character(early.g1.genes), as.character(late.g1.genes), as.character(s.genes), as.character(g2m.genes))
df_order <- factor(df$Phase, levels = c("Early G1", "Late G1", "S", "G2M", "Non-cycling"))

df <- df[order(df$Cluster, df_order), ]

cell_order <- rownames(df)
levels(tryp$Phase) <- c("Early G1", "Late G1", "S", "G2M", "Non-cycling")

## Function in seperate file 
p <- DoMultiBarHeatmap(tryp, features = cycling_genes, label = FALSE, draw.lines = TRUE, assay = "integrated", additional.group.by = "Phase")
p

#pdf(file = "Heatmap_WT_int_cycling_genes_bar.pdf", width = 8, height = 6)
#p
#dev.off()

```

###### PART TWO #######

# Order slender cells with trajectory analysis

```{r}
tryp@meta.data[["cluster"]] <- as.character(tryp@active.ident)

tryp_subset <- subset(tryp, subset = cluster == c("LS A", "LS B"))

tryp_subset <- RunUMAP(tryp_subset, dims = 1:8, reduction = "pca", min.dist = 0.1)

DimPlot(tryp_subset, reduction = "umap", label = FALSE,
        label.size = 4,
        pt.size = 0.5, group.by = "Phase", cols = mycolours) + UMAP_theme
```

### Trajectory inference with slingshot

```{r}
# convert to sce
sce <- as.SingleCellExperiment(tryp_subset, assay = "integrated")

# Run slingshot, setting the starting cluster
sce <- slingshot(sce, reducedDim = 'UMAP', clusterLabels = sce@colData@listData[["Phase"]], start.clus = "Late G1", end.clus = c("Non-cycling"), shrink = 1)

## Plots
sce$Phase <- as.factor(sce$Phase)
#pdf(file = "WT_slender_phase_umap.pdf", width = 4.5, height = 4.5)
plot(reducedDims(sce)$UMAP, col = mycolours[sce$Phase], pch = 16, cex = 0.5, bty='l', axes = FALSE, ann = FALSE) + lines(SlingshotDataSet(sce), col = "black", lwd = 2)
#dev.off()   


```


```{r}

Pseudotime <-sce$slingPseudotime_1 
df2 <- data.frame(tryp_subset@meta.data[["S.aggregate"]], tryp_subset@meta.data[["G2M.aggregate"]], tryp_subset@meta.data[["Early.G1.aggregate"]], tryp_subset@meta.data[["Late.G1.aggregate"]])
colnames(df2) <- c("S", "G2M", "Early G1", "Late G1")
rownames(df2) <- tryp_subset@assays[["integrated"]]@data@Dimnames[[2]]

df2$Phase <- tryp_subset$Phase
df2$Pseudotime <- sce$slingPseudotime_1 

df2 <- df2 %>%
  mutate(Phase= fct_relevel(Phase, levels = "Non-cycling", "Early G1",  "G2M", "S", "Late G1"))
ggplot(df2, aes(Pseudotime, Phase, fill = Phase)) + geom_density_ridges() + 
  scale_fill_manual(values=c("grey", "#f8766d", "#7cae00", "#c77cff", "#01bfc4")) + theme_bw() +
  ylab("Number of cells") + NoLegend()


```


####### PART THREE #######

## Conduct peudotime analysis with the cycling cells, in my data the slender clusters

```{r}
## Conduct peudotime analysis with the cycling cells, in my data the slender clusters

tryp_subset <- subset(tryp_subset, subset = Phase != c("Non-cycling"))

# Find top variable genes for slenders only
tryp_subset <- FindVariableFeatures(tryp_subset, nfeatures = 2000, assay = "integrated")
slender_variable_genes <- tryp_subset@assays[["integrated"]]@var.features
write.csv(slender_variable_genes, file = "slender_variable_genes.csv")

# Repeat PCA analysis with these variable genes
tryp_subset  <- RunPCA(tryp_subset, verbose = FALSE)

tryp_subset <- RunUMAP(tryp_subset, dims = 1:12, reduction = "pca", min.dist = 0.1)
mycolours <- c("#f8766d", "#7cae00", "#01bfc4", "#c77cff")

p <- DimPlot(tryp_subset, reduction = "umap", label = FALSE,
        label.size = 4,
        pt.size = 0.5, group.by = "Phase", cols = mycolours) + UMAP_theme
p

#pdf(file = "UMAP_slender_cycling_subset_phase.pdf", width = 2.2, height = 2)
#p
#dev.off()

```



```{r}

## Get UMAP dims
rd <- tryp_subset@reductions[["umap"]]@cell.embeddings

plot(rd, pch=16, col = mycolours[as.factor(tryp_subset$Phase)] ) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), panel.background = element_blank(),
        plot.title = element_text(size = 10, face = "bold"), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 9), axis.title.y = element_text(size = 9), axis.text.y =  element_text(size = 8))

# Draw a seperate curve for trajectory inference 
pcc <- principal_curve(rd, smoother="periodic_lowess")
#add line to plot
lines(x=pcc$s[order(pcc$lambda),1], y=pcc$s[order(pcc$lambda),2], col="black", lwd=2)



```

plot(rd, pch=16, col = mycolours[as.factor(tryp_subset$Phase)] ) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), panel.background = element_blank(),
        plot.title = element_text(size = 10, face = "bold"), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 9), axis.title.y = element_text(size = 9), axis.text.y =  element_text(size = 8))

lines(x=pcc$s[order(pcc$lambda),1], y=pcc$s[order(pcc$lambda),2], col="black", lwd=2)


```{r}
colors <- rev(colorRampPalette(brewer.pal(11,'Spectral')[-6])(100))
plotcol <- colors[cut(pcc$lambda, breaks=100)]

data <- as.data.frame(rd)

data$pseudotime <- pcc$lambda
p <- ggplot(data, aes(x = UMAP_1, y = UMAP_2, color = pseudotime)) + geom_point(size = 1) + scale_color_gradientn(colours = colors) +
  geom_path(x=pcc$s[order(pcc$lambda),1], y=pcc$s[order(pcc$lambda),2], color = "black", lwd = 1) + 
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), panel.background = element_blank(),
        plot.title = element_text(size = 10, face = "bold"), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 9), axis.title.y = element_text(size = 9), axis.text.y =  element_text(size = 8))

p

#pdf(file = "UMAP_cycling_slender_pseudotime_princure.pdf", width = 3.1, height = 2)
#p
#dev.off()     
```

```{r}
# fit smoothers on raw data
nPointsClus <- 50
counts <- as.matrix(tryp_subset@assays[["RNA"]]@counts)
cWeights <- rep(1,ncol(counts))
pseudoT <- matrix(pcc$lambda,nrow=ncol(counts),ncol=1)
gamList <- fitGAM(counts, pseudotime=pseudoT, cellWeights=cWeights, nknots=5)
save(gamList, file = "slenders_GAM")
```


```{r}
# Test for association of expression with the trajectory
assocTestRes_cellCycle <- associationTest(gamList)
assoc.genes <- subset(assocTestRes_cellCycle, pvalue < 0.05)
assoc.genes

assoc.genes <- subset(assoc.genes, meanLogFC > 0.301)
assoc.genes
assoc.genes <- rownames(assoc.genes)
#write.csv(assoc.genes, file = "cc_asso_genes.csv")
#write.csv(assocTestRes_cellCycle, file = "assocTestRes_cellCycle.csv")
```

```{r}
# Add phase info to object
gamList$phase <- tryp_subset$Phase

#pdf(file = "CYC8_cell_cycle_smooth.pdf", width = 1.8, height = 1.5)
plotSmoothers(gamList, counts, gene = "Tbrucei---Tb927.7.1590", lwd = 0.75, sample = 1, alpha = 0.5, pointCol = "phase") +
  labs(title = "CYC8") + NoLegend() + 
  scale_x_reverse() +
  theme(plot.title = element_text(size = 9, face = "bold"), axis.text.x = element_text(size = 8), axis.title.x = element_text(size = 8), axis.title.y = element_text(size = 8), axis.text.y =  element_text(size = 8))
#dev.off()
```

```{r}

library(ComplexHeatmap)
clusPat_slender <- clusterExpressionPatterns(gamList, nPoints = nPointsClus,
                                     genes = assoc.genes)

#Extract lineage data
yhat_lin1 <- as.data.frame(clusPat_slender$yhatScaled)
yhat_lin1 <- yhat_lin1[ ,1:50]
colnames(yhat_lin1) <- sub("l1:t", "", colnames(yhat_lin1))
yhat_lin1 <- yhat_lin1[ order(row.names(yhat_lin1)), ]
yhat_lin1 <-yhat_lin1[ , order(seq_len(ncol(yhat_lin1)))]
yhat_lin1$position <- colnames(yhat_lin1)[apply(yhat_lin1,1,which.max)]
yhat_lin1 <- yhat_lin1[order(yhat_lin1$position), ]
data_cell <- as.matrix(yhat_lin1[, 1:50])

#Note annotations were generated manual in excel
#ha2 <- rowAnnotation(genes = anno_mark(at = CCgenes$Position, labels = CCgenes$Name, side = "left"))

colors <- setNames(colorRampPalette(brewer.pal(11,'Spectral')[-6])(50), 1:50)
ha <- HeatmapAnnotation(pseudotime = 1:50, col = list(pseudotime = colors),  show_legend = FALSE)
#pdf(file = "Cellcyce_new_heatmap.pdf", height = 7, width = 5)
Heatmap(data_cell, cluster_columns = FALSE, show_column_names = FALSE, cluster_rows = FALSE, top_annotation = ha, show_row_dend = FALSE,
        show_heatmap_legend = TRUE, heatmap_legend_param = list(title = "expression"), show_row_names = FALSE)
#dev.off()


```



Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). 

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

