In this analysis we analyse the mass spectrometer data provided. We read in a trimmed version of the data and establish the following cut-offs
phospho_data <- read_xlsx("/Users/stephen/Library/CloudStorage/OneDrive-Personal/Martens Lab Onedrive/Bioinformatics/PI3K Phosphorylation/LCMS_report_trimmed.xlsx", sheet = 1)#Take peptide_intensity of Phospho sites
colnames(phospho_data)#Names of each column
## [1] "Proteins" "Comments"
## [3] "Positions within proteins" "Leading proteins"
## [5] "Protein" "Protein names"
## [7] "Gene names" "Fasta headers"
## [9] "Localization prob" "Score diff"
## [11] "PEP" "Score"
## [13] "Number of Phospho (STY)" "Amino acid"
## [15] "Phospho (STY) Probabilities" "Position in peptide"
## [17] "Charge" "Mass error [ppm]"
## [19] "mCherry_TBK1_ATG14Cherry_0_s15_25p" "mCherry_TBK1_ATG14Cherry_30_s19_25p"
## [21] "mCherry_TBK1_Becline_0_s16_25p" "mCherry_TBK1_Becline_30_s20_25p"
## [23] "mCherry_TBK1_VPS15_0_s13_40p" "mCherry_TBK1_VPS15_30_s17_40p"
## [25] "mCherry_TBK1_VPS34_0_s14_25p" "mCherry_TBK1_VPS34_30_s18_25p"
## [27] "mCherry_ULK1_ATG14Cherry_0_s23_25p" "mCherry_ULK1_ATG14Cherry_30_s27_25p"
## [29] "mCherry_ULK1_Becline_0_s24_25p" "mCherry_ULK1_Becline_30_s28_25p"
## [31] "mCherry_ULK1_VPS15_0_s21_40p" "mCherry_ULK1_VPS15_30_s25_40p"
## [33] "mCherry_ULK1_VPS34_0_s22_25p" "mCherry_ULK1_VPS34_30_s26_25p"
## [35] "TBK1_ATG14_Becline_0_s3_50p" "TBK1_ATG14_Becline_30_s6_50p"
## [37] "TBK1_VPS15_0_s1_50p" "TBK1_VPS15_30_s4_50p"
## [39] "TBK1_VPS34_0_s2_50p" "TBK1_VPS34_30_s5_50p"
## [41] "ULK1_ATG14_Becline_0_s9_50p" "ULK1_ATG14_Becline_30_s12_50p"
## [43] "ULK1_VPS15_0_s7_50p" "ULK1_VPS15_30_s10_50p"
## [45] "ULK1_VPS34_0_s8_50p" "ULK1_VPS34_30_s11_50p"
metadata <- column_to_rownames(read_xlsx("/Users/stephen/Library/CloudStorage/OneDrive-Personal/Martens Lab Onedrive/Bioinformatics/PI3K Phosphorylation/Phosphosite_metadata.xlsx"), "Label")
head(metadata)#Labels broken down into data fields
## User Target Band Time Standard_S
## mCherry_TBK1_ATG14Cherry_0_s15_25p Daniel TBK1 ATG14 0 15
## mCherry_TBK1_ATG14Cherry_30_s19_25p Daniel TBK1 ATG14 30 19
## mCherry_TBK1_Becline_0_s16_25p Daniel TBK1 Beclin1 0 16
## mCherry_TBK1_Becline_30_s20_25p Daniel TBK1 Beclin1 30 20
## mCherry_TBK1_VPS15_0_s13_40p Daniel TBK1 VPS15 0 13
## mCherry_TBK1_VPS15_30_s17_40p Daniel TBK1 VPS15 30 17
## Trypsin_P
## mCherry_TBK1_ATG14Cherry_0_s15_25p 25
## mCherry_TBK1_ATG14Cherry_30_s19_25p 25
## mCherry_TBK1_Becline_0_s16_25p 25
## mCherry_TBK1_Becline_30_s20_25p 25
## mCherry_TBK1_VPS15_0_s13_40p 40
## mCherry_TBK1_VPS15_30_s17_40p 40
The data frame contains the peptide intensity for the TBK1 and ULK1 samples. The metadata contains a breakdown of the sample labels.
In the phospho site data, the following cut-offs will be applied
If the sum of the row of the peptide is equal to 0, it will be excluded
If the peptides score is less than 90 it is excluded
All samples with a score between 90-100 are labelled with a low-score warning in the metadata
## [1] "Number of rows before cutoff: 204 Number of rows after cutoff: 35"
##
## FALSE TRUE
## 33 2
## [1] "7 peptides have a score between 90 and 100"
phospho_sel$pep_label <- 1:nrow(phospho_sel)
phospho_mat <- column_to_rownames(phospho_sel[,c(19:46, (ncol(phospho_sel)))], "pep_label")#Gives the rows an identifier since no column iD seems unique
phospho_sel <- column_to_rownames(phospho_sel, "pep_label")#Same for phospho_sel
metadata$label <- paste0(metadata$User, "_", metadata$Target, "_", metadata$Band)
metadata <- metadata[order(metadata$label, metadata$Time),]
metadata$sample_name <- rownames(metadata)
timepoint_0 <- subset(metadata, metadata$Time==0)$sample_name
timepoint_30 <- subset(metadata, metadata$Time==30)$sample_name
timepoint_0_mat <- phospho_mat[, timepoint_0]
timepoint_30_mat <- phospho_mat[, timepoint_30]
setnames(timepoint_0_mat, old = subset(metadata, metadata$Time=="0")$sample_name, new = subset(metadata, metadata$Time=="0")$label)
setnames(timepoint_30_mat, old = subset(metadata, metadata$Time=="30")$sample_name, new = subset(metadata, metadata$Time=="30")$label)
cols <- sort(intersect(names(timepoint_0_mat), names(timepoint_30_mat)))
setdiff(names(timepoint_0_mat), names(timepoint_30_mat))#Check that no columns have non matching titles
## character(0)
substr_mat <- timepoint_30_mat[cols]-timepoint_0_mat[cols]
substr_mat[substr_mat<0] <- NA # Ignores negative values
phospho_subtr <- cbind(phospho_sel[, c(1:18, 47)], substr_mat)
#write.csv(phospho_subtr, "~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")
colnames(phospho_subtr)
## [1] "Proteins" "Comments"
## [3] "Positions within proteins" "Leading proteins"
## [5] "Protein" "Protein names"
## [7] "Gene names" "Fasta headers"
## [9] "Localization prob" "Score diff"
## [11] "PEP" "Score"
## [13] "Number of Phospho (STY)" "Amino acid"
## [15] "Phospho (STY) Probabilities" "Position in peptide"
## [17] "Charge" "Mass error [ppm]"
## [19] "Low_Score_Warning" "Daniel_TBK1_ATG14"
## [21] "Daniel_TBK1_Beclin1" "Daniel_TBK1_VPS15"
## [23] "Daniel_TBK1_VPS34" "Daniel_ULK1_ATG14"
## [25] "Daniel_ULK1_Beclin1" "Daniel_ULK1_VPS15"
## [27] "Daniel_ULK1_VPS34" "Elias_TBK1_ATG14"
## [29] "Elias_TBK1_VPS15" "Elias_TBK1_VPS34"
## [31] "Elias_ULK1_ATG14" "Elias_ULK1_VPS15"
## [33] "Elias_ULK1_VPS34"
phospho_subtr_sel <- subset(phospho_subtr, rowSums(phospho_subtr[, 21:33], na.rm = T)>0)#ignores NA and removes rows that have a sum of 0
nrow(phospho_subtr); nrow(phospho_subtr_sel)
## [1] 32
## [1] 27
#phospho_subtr <- read.csv("~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")
#write.csv(phospho_subtr_sel, "~/OneDrive/Martens Lab/R Work/phospho_sites_30sub0.csv")
melt_phospho <- melt(phospho_subtr[,c(1,3,6,7,20:33)], id.vars=c("Proteins", "Positions within proteins", "Protein names", "Gene names"), na.rm = T)
## Warning in melt(phospho_subtr[, c(1, 3, 6, 7, 20:33)], id.vars = c("Proteins", :
## The melt generic in data.table has been passed a data.frame and will attempt
## to redirect to the relevant reshape2 method; please note that reshape2 is
## deprecated, and this redirection is now deprecated as well. To continue using
## melt methods from reshape2 while both libraries are attached, e.g. melt.list,
## you can prepend the namespace like reshape2::melt(phospho_subtr[, c(1, 3, 6, 7,
## 20:33)]). In the next version, this warning will become an error.
melt_phospho <- melt_phospho[order(melt_phospho$`Positions within proteins`),]
melt_phospho$`Positions within proteins` <- gsub(".*;", "", melt_phospho$`Positions within proteins`)#removes N terminal before semi colon (position)
melt_phospho$`Positions within proteins` <- as.numeric(melt_phospho$`Positions within proteins`)#turns the numbers to numeric
melt_phospho <- melt_phospho %>% dplyr::arrange(`Positions within proteins`)
melt_phospho <- melt_phospho[order(as.numeric(melt_phospho$`Positions within proteins`)),]
melt_phospho$`Gene names` <- ifelse(is.na(melt_phospho$`Gene names`), melt_phospho$Proteins, melt_phospho$`Gene names`)
melt_phospho$log10PI<- log10(melt_phospho$value+1)
p <- ggplot(melt_phospho, aes(x=`Positions within proteins`, y=log10PI, color=variable))+geom_point()+facet_wrap(~`Gene names`, scales = "free_x")+theme(axis.text.x = element_text(angle = 90))+
labs(title = "Plot of position of peptide on protein and peptide intensity", x="Position on Protein", y="Log10(Difference in Peptide Intensity, (t30-t0))")+scale_x_continuous()+scale_y_continuous(limits = c(0,10))
pg <- ggplotly(p)
pg
p
For the publication several plots are made for the figure. For this the mCherry_Q6ZNE5 can be removed. In addition plots should take into account whether the protein is from the correct band or is just co-selected in the same band. Positions within the protein are labelled in each plot. TBK1 and ULK1 are separated by colour.
melt_phospho[c('User', 'Kinase', "Band")] <- str_split_fixed(melt_phospho$variable, '_', 3)
phospho_sel <- subset(melt_phospho, `Gene names`!="mCherry_Q6ZNE5")#Remove mCherry identification
#Changing alias names
phospho_sel$target <- ""
phospho_sel[phospho_sel$`Gene names`=="PIK3R4", "target"] <- "VPS15"
phospho_sel[phospho_sel$`Gene names`=="PIK3C3", "target"] <- "VPS34"
phospho_sel[phospho_sel$`Gene names`=="BECN1", "target"] <- "Beclin1"
phospho_sel[phospho_sel$`Gene names`=="ATG14", "target"] <- "ATG14"
phospho_sites <- phospho_sel %>% group_by(`Positions within proteins`, target) %>%
summarise(max = max(log10PI, na.rm=TRUE))
## `summarise()` has grouped output by 'Positions within proteins'. You can
## override using the `.groups` argument.
p1 <- ggplot(phospho_sel, aes(x=`Positions within proteins`, y=log10PI))+
geom_col(width=0.1, color="black", position = position_identity())+
geom_point(shape=21, aes(fill=Kinase), size=4)+
geom_text(data=phospho_sites, aes(x=`Positions within proteins`, y=max+0.7, label=`Positions within proteins`), angle=90)+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
scale_x_continuous(breaks = scales::pretty_breaks())+
theme_bw()+
labs(y="Log10 Adjusted Peptide Intensity (30min-0min)", title = "Band≠Target")+
facet_wrap(~target, ncol = 2, scales = "free_x")
melt_phospho[c('User', 'Kinase', "Band")] <- str_split_fixed(melt_phospho$variable, '_', 3)
phospho_sel <- subset(melt_phospho, `Gene names`!="mCherry_Q6ZNE5")#Remove mCherry identification
#Changing alias names
phospho_sel$target <- ""
phospho_sel[phospho_sel$`Gene names`=="PIK3R4", "target"] <- "VPS15"
phospho_sel[phospho_sel$`Gene names`=="PIK3C3", "target"] <- "VPS34"
phospho_sel[phospho_sel$`Gene names`=="BECN1", "target"] <- "Beclin1"
phospho_sel[phospho_sel$`Gene names`=="ATG14", "target"] <- "ATG14"
#Band=target
phospho_sel <- phospho_sel[phospho_sel$target==phospho_sel$Band,]
phospho_sites <- phospho_sel %>% group_by(`Positions within proteins`, target) %>%
summarise(max = max(log10PI, na.rm=TRUE))
## `summarise()` has grouped output by 'Positions within proteins'. You can
## override using the `.groups` argument.
p2 <- ggplot(phospho_sel, aes(x=`Positions within proteins`, y=log10PI))+
geom_col(width=0.1, color="black", position = position_identity())+
geom_point(shape=21, aes(fill=Kinase), size=4)+
geom_text(data=phospho_sites, aes(x=`Positions within proteins`, y=max+0.7, label=`Positions within proteins`), angle=90)+
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))+
scale_x_continuous(labels = scales::label_number(accuracy = 1), breaks = scales::breaks_pretty())+
theme_bw()+
labs(y="Log10 Adjusted Peptide Intensity (30min-0min)", title = "Band=Target")+
facet_wrap(~target, ncol = 2, scales = "free_x")
p2
ggarrange(p1, p2, ncol=1)