Introduction

In this analysis we analyse the mass spectrometer data provided. We read in a trimmed version of the data and establish the following cut-offs

phospho_data <- read_xlsx("/Users/stephen/Library/CloudStorage/OneDrive-Personal/Martens Lab Onedrive/Bioinformatics/PI3K Phosphorylation/LCMS_report_trimmed.xlsx", sheet = 1)#Take peptide_intensity of Phospho sites
colnames(phospho_data)#Names of each column
##  [1] "Proteins"                            "Comments"                           
##  [3] "Positions within proteins"           "Leading proteins"                   
##  [5] "Protein"                             "Protein names"                      
##  [7] "Gene names"                          "Fasta headers"                      
##  [9] "Localization prob"                   "Score diff"                         
## [11] "PEP"                                 "Score"                              
## [13] "Number of Phospho (STY)"             "Amino acid"                         
## [15] "Phospho (STY) Probabilities"         "Position in peptide"                
## [17] "Charge"                              "Mass error [ppm]"                   
## [19] "mCherry_TBK1_ATG14Cherry_0_s15_25p"  "mCherry_TBK1_ATG14Cherry_30_s19_25p"
## [21] "mCherry_TBK1_Becline_0_s16_25p"      "mCherry_TBK1_Becline_30_s20_25p"    
## [23] "mCherry_TBK1_VPS15_0_s13_40p"        "mCherry_TBK1_VPS15_30_s17_40p"      
## [25] "mCherry_TBK1_VPS34_0_s14_25p"        "mCherry_TBK1_VPS34_30_s18_25p"      
## [27] "mCherry_ULK1_ATG14Cherry_0_s23_25p"  "mCherry_ULK1_ATG14Cherry_30_s27_25p"
## [29] "mCherry_ULK1_Becline_0_s24_25p"      "mCherry_ULK1_Becline_30_s28_25p"    
## [31] "mCherry_ULK1_VPS15_0_s21_40p"        "mCherry_ULK1_VPS15_30_s25_40p"      
## [33] "mCherry_ULK1_VPS34_0_s22_25p"        "mCherry_ULK1_VPS34_30_s26_25p"      
## [35] "TBK1_ATG14_Becline_0_s3_50p"         "TBK1_ATG14_Becline_30_s6_50p"       
## [37] "TBK1_VPS15_0_s1_50p"                 "TBK1_VPS15_30_s4_50p"               
## [39] "TBK1_VPS34_0_s2_50p"                 "TBK1_VPS34_30_s5_50p"               
## [41] "ULK1_ATG14_Becline_0_s9_50p"         "ULK1_ATG14_Becline_30_s12_50p"      
## [43] "ULK1_VPS15_0_s7_50p"                 "ULK1_VPS15_30_s10_50p"              
## [45] "ULK1_VPS34_0_s8_50p"                 "ULK1_VPS34_30_s11_50p"
metadata <- column_to_rownames(read_xlsx("/Users/stephen/Library/CloudStorage/OneDrive-Personal/Martens Lab Onedrive/Bioinformatics/PI3K Phosphorylation/Phosphosite_metadata.xlsx"), "Label")
head(metadata)#Labels broken down into data fields
##                                       User Target    Band Time Standard_S
## mCherry_TBK1_ATG14Cherry_0_s15_25p  Daniel   TBK1   ATG14    0         15
## mCherry_TBK1_ATG14Cherry_30_s19_25p Daniel   TBK1   ATG14   30         19
## mCherry_TBK1_Becline_0_s16_25p      Daniel   TBK1 Beclin1    0         16
## mCherry_TBK1_Becline_30_s20_25p     Daniel   TBK1 Beclin1   30         20
## mCherry_TBK1_VPS15_0_s13_40p        Daniel   TBK1   VPS15    0         13
## mCherry_TBK1_VPS15_30_s17_40p       Daniel   TBK1   VPS15   30         17
##                                     Trypsin_P
## mCherry_TBK1_ATG14Cherry_0_s15_25p         25
## mCherry_TBK1_ATG14Cherry_30_s19_25p        25
## mCherry_TBK1_Becline_0_s16_25p             25
## mCherry_TBK1_Becline_30_s20_25p            25
## mCherry_TBK1_VPS15_0_s13_40p               40
## mCherry_TBK1_VPS15_30_s17_40p              40

The data frame contains the peptide intensity for the TBK1 and ULK1 samples. The metadata contains a breakdown of the sample labels.

Cutoffs

In the phospho site data, the following cut-offs will be applied

## [1] "Number of rows before cutoff: 204     Number of rows after cutoff: 35"
## 
## FALSE  TRUE 
##    33     2
## [1] "7 peptides have a score between 90 and 100"

Subtracting the baseline

phospho_sel$pep_label <- 1:nrow(phospho_sel)
phospho_mat <- column_to_rownames(phospho_sel[,c(19:46, (ncol(phospho_sel)))], "pep_label")#Gives the rows an identifier since no column iD seems unique
phospho_sel <- column_to_rownames(phospho_sel, "pep_label")#Same for phospho_sel
metadata$label <- paste0(metadata$User, "_",  metadata$Target, "_",  metadata$Band)
metadata <- metadata[order(metadata$label, metadata$Time),]
metadata$sample_name <- rownames(metadata)
timepoint_0 <- subset(metadata, metadata$Time==0)$sample_name
timepoint_30 <- subset(metadata, metadata$Time==30)$sample_name
timepoint_0_mat <- phospho_mat[, timepoint_0]
timepoint_30_mat <- phospho_mat[, timepoint_30]
setnames(timepoint_0_mat, old = subset(metadata, metadata$Time=="0")$sample_name, new = subset(metadata, metadata$Time=="0")$label)
setnames(timepoint_30_mat, old = subset(metadata, metadata$Time=="30")$sample_name, new = subset(metadata, metadata$Time=="30")$label)
cols <- sort(intersect(names(timepoint_0_mat), names(timepoint_30_mat)))
setdiff(names(timepoint_0_mat), names(timepoint_30_mat))#Check that no columns have non matching titles
## character(0)
substr_mat <- timepoint_30_mat[cols]-timepoint_0_mat[cols]
substr_mat[substr_mat<0] <- NA # Ignores negative values
phospho_subtr <- cbind(phospho_sel[, c(1:18, 47)], substr_mat)

Write the cleaned file to csv for later reference

#write.csv(phospho_subtr, "~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")
colnames(phospho_subtr)
##  [1] "Proteins"                    "Comments"                   
##  [3] "Positions within proteins"   "Leading proteins"           
##  [5] "Protein"                     "Protein names"              
##  [7] "Gene names"                  "Fasta headers"              
##  [9] "Localization prob"           "Score diff"                 
## [11] "PEP"                         "Score"                      
## [13] "Number of Phospho (STY)"     "Amino acid"                 
## [15] "Phospho (STY) Probabilities" "Position in peptide"        
## [17] "Charge"                      "Mass error [ppm]"           
## [19] "Low_Score_Warning"           "Daniel_TBK1_ATG14"          
## [21] "Daniel_TBK1_Beclin1"         "Daniel_TBK1_VPS15"          
## [23] "Daniel_TBK1_VPS34"           "Daniel_ULK1_ATG14"          
## [25] "Daniel_ULK1_Beclin1"         "Daniel_ULK1_VPS15"          
## [27] "Daniel_ULK1_VPS34"           "Elias_TBK1_ATG14"           
## [29] "Elias_TBK1_VPS15"            "Elias_TBK1_VPS34"           
## [31] "Elias_ULK1_ATG14"            "Elias_ULK1_VPS15"           
## [33] "Elias_ULK1_VPS34"
phospho_subtr_sel <- subset(phospho_subtr, rowSums(phospho_subtr[, 21:33], na.rm = T)>0)#ignores NA and removes rows that have a sum of 0
nrow(phospho_subtr); nrow(phospho_subtr_sel)
## [1] 32
## [1] 27
#phospho_subtr <- read.csv("~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")
#write.csv(phospho_subtr_sel, "~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")

Plots

Interactive graphs of the dataset

melt_phospho <- melt(phospho_subtr[,c(1,3,6,7,20:33)], id.vars=c("Proteins", "Positions within proteins", "Protein names", "Gene names"), na.rm = T)
## Warning in melt(phospho_subtr[, c(1, 3, 6, 7, 20:33)], id.vars = c("Proteins", :
## The melt generic in data.table has been passed a data.frame and will attempt
## to redirect to the relevant reshape2 method; please note that reshape2 is
## deprecated, and this redirection is now deprecated as well. To continue using
## melt methods from reshape2 while both libraries are attached, e.g. melt.list,
## you can prepend the namespace like reshape2::melt(phospho_subtr[, c(1, 3, 6, 7,
## 20:33)]). In the next version, this warning will become an error.
melt_phospho <- melt_phospho[order(melt_phospho$`Positions within proteins`),]
melt_phospho$`Positions within proteins` <- gsub(".*;", "", melt_phospho$`Positions within proteins`)#removes N terminal before semi colon (position)
melt_phospho$`Positions within proteins` <- as.numeric(melt_phospho$`Positions within proteins`)#turns the numbers to numeric
melt_phospho <- melt_phospho %>% dplyr::arrange(`Positions within proteins`)
melt_phospho <- melt_phospho[order(as.numeric(melt_phospho$`Positions within proteins`)),]
melt_phospho$`Gene names` <- ifelse(is.na(melt_phospho$`Gene names`), melt_phospho$Proteins, melt_phospho$`Gene names`)
melt_phospho$log10PI<- log10(melt_phospho$value+1)
p <- ggplot(melt_phospho, aes(x=`Positions within proteins`, y=log10PI, color=variable))+geom_point()+facet_wrap(~`Gene names`, scales = "free_x")+theme(axis.text.x = element_text(angle = 90))+
  labs(title = "Plot of position of peptide on protein and peptide intensity", x="Position on Protein", y="Log10(Difference in Peptide Intensity, (t30-t0))")+scale_x_continuous()+scale_y_continuous(limits = c(0,10))
pg <- ggplotly(p)
pg
p

Publication plots

For the publication several plots are made for the figure. For this the mCherry_Q6ZNE5 can be removed. In addition plots should take into account whether the protein is from the correct band or is just co-selected in the same band. Positions within the protein are labelled in each plot. TBK1 and ULK1 are separated by colour.

Phosphorylation site regardless of bands

melt_phospho[c('User', 'Kinase', "Band")] <- str_split_fixed(melt_phospho$variable, '_', 3)
phospho_sel <- subset(melt_phospho, `Gene names`!="mCherry_Q6ZNE5")#Remove mCherry identification
#Changing alias names
phospho_sel$target <- ""
phospho_sel[phospho_sel$`Gene names`=="PIK3R4", "target"] <- "VPS15"
phospho_sel[phospho_sel$`Gene names`=="PIK3C3", "target"] <- "VPS34"
phospho_sel[phospho_sel$`Gene names`=="BECN1", "target"] <- "Beclin1"
phospho_sel[phospho_sel$`Gene names`=="ATG14", "target"] <- "ATG14"

phospho_sites <- phospho_sel %>% group_by(`Positions within proteins`, target) %>%
  summarise(max = max(log10PI, na.rm=TRUE))
## `summarise()` has grouped output by 'Positions within proteins'. You can
## override using the `.groups` argument.
p1 <- ggplot(phospho_sel, aes(x=`Positions within proteins`, y=log10PI))+
  geom_col(width=0.1, color="black", position = position_identity())+
  geom_point(shape=21, aes(fill=Kinase), size=4)+
  geom_text(data=phospho_sites, aes(x=`Positions within proteins`, y=max+0.7, label=`Positions within proteins`), angle=90)+
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
  scale_x_continuous(breaks = scales::pretty_breaks())+
  theme_bw()+
  labs(y="Log10 Adjusted Peptide Intensity (30min-0min)", title = "Band≠Target")+
  facet_wrap(~target, ncol = 2, scales = "free_x")

Removing proteins that are not found in their appropriate band

melt_phospho[c('User', 'Kinase', "Band")] <- str_split_fixed(melt_phospho$variable, '_', 3)
phospho_sel <- subset(melt_phospho, `Gene names`!="mCherry_Q6ZNE5")#Remove mCherry identification
#Changing alias names
phospho_sel$target <- ""
phospho_sel[phospho_sel$`Gene names`=="PIK3R4", "target"] <- "VPS15"
phospho_sel[phospho_sel$`Gene names`=="PIK3C3", "target"] <- "VPS34"
phospho_sel[phospho_sel$`Gene names`=="BECN1", "target"] <- "Beclin1"
phospho_sel[phospho_sel$`Gene names`=="ATG14", "target"] <- "ATG14"

#Band=target
phospho_sel <- phospho_sel[phospho_sel$target==phospho_sel$Band,]

phospho_sites <- phospho_sel %>% group_by(`Positions within proteins`, target) %>%
  summarise(max = max(log10PI, na.rm=TRUE))
## `summarise()` has grouped output by 'Positions within proteins'. You can
## override using the `.groups` argument.
p2 <- ggplot(phospho_sel, aes(x=`Positions within proteins`, y=log10PI))+
  geom_col(width=0.1, color="black", position = position_identity())+
  geom_point(shape=21, aes(fill=Kinase), size=4)+
  geom_text(data=phospho_sites, aes(x=`Positions within proteins`, y=max+0.7, label=`Positions within proteins`), angle=90)+
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
  scale_x_continuous(labels = scales::label_number(accuracy = 1), breaks = scales::breaks_pretty())+
  theme_bw()+
  labs(y="Log10 Adjusted Peptide Intensity (30min-0min)", title = "Band=Target")+
  facet_wrap(~target, ncol = 2, scales = "free_x")
p2

ggarrange(p1, p2, ncol=1)