Load/install required libraries

if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
library(pacman)
p_load(knitr, dplyr, reshape2, spdep, raster, doParallel, scales, viridis, plot.matrix,
       data.table, dynamicTreeCut, RColorBrewer, ggplot2)

Set working directory.

Alter this path to the location containing the “Code” and “Data” folders

knitr::opts_knit$set(root.dir = 'E:/SpatialProteomicsCode')

Custom functions

#Define function to read maxquant proteingroups table
readMQ <- function(fp) {
  read.csv(fp, sep = "\t", stringsAsFactors = FALSE, header = TRUE, quote = "")
}

#Define function to median centre columns of a dataframe
normalise <- function(protein.data) {
    sapply(protein.data, function(y) y - median(as.numeric(y), na.rm = TRUE))
}

#Define function to calculate Moran's Index in parallel
moran.Parallel <- function(protein.data, geneNames, nPixels, cores, multiPlate = FALSE, nsim = 999, nrow = 16, ncol = 24,
                                   impute = FALSE, lag.max = (nrow*ncol)/2, plot = FALSE,
                                   cutoff = 8) {
  registerDoParallel(cores = cores)
  fill.na <- function(x, i=5) {
    if( is.na(x)[i] ) {
      return( round(mean(x, na.rm=TRUE),0) )
    } else {
      #return( round(x[i],0) )
      return(x[i])
    }
  }
  
  a <- foreach(gene = iter(geneNames), .combine = "rbind", .packages = c("raster")) %dopar% {
    errorFlag <- FALSE
    filteredGenes <- dplyr::filter(protein.data, Gene.names == gene)
    vec <- as.numeric(filteredGenes[1,][1:nPixels])
    if (all(is.na(vec))) {
      return(data.frame(gene = gene, moran = NA, p.value = NA, valid.values = 0))
    }
    
    if (multiPlate == TRUE) {
      plate1 = matrix(rev(vec[1:96]), nrow = 8, ncol = 12, byrow = T)
      plate2 = matrix(rev(vec[97:192]), nrow = 8, ncol = 12, byrow = T)
      plate3 = matrix(rev(vec[193:288]), nrow = 8, ncol = 12, byrow = T)
      plate4 = matrix(rev(vec[289:384]), nrow = 8, ncol = 12, byrow = T)
      
      m = rbind(cbind(plate1,plate2),cbind(plate3,plate4))
      
      r <- raster::raster(m)
    }
    else {
      r <- raster(matrix(vec, nrow = nrow, ncol = ncol, byrow = T))
    }
   
    
    if (impute == TRUE) {
      r <- raster::focal(r, w = matrix(1,3,3), fun = fill.na, pad = TRUE, na.rm = FALSE)
    }
    
    if (plot == TRUE) {
      if (impute == FALSE) {
        plot(as.matrix(r), xlab = '', ylab = '', xaxt = 'n', na.col = 'white',
             col = viridis(24), main = paste(gene))  
      }
      if (impute == TRUE) {
        plot(as.matrix(r), xlab = '', ylab = '', xaxt = 'n', na.col = 'white',
             col = viridis(24), main = paste(gene, "(Imputed)"))
      }
    }
    
    if (sum(!is.na(raster::as.matrix(r))) < cutoff) {
      return(data.frame(gene = gene, moran = NA, p.value = NA, valid.values = sum(!is.na(raster::as.matrix(r)))))
    }
    
    w <- spdep::poly2nb(raster::rasterToPolygons(r, na.rm = F))
    ww <- spdep::nb2listw(w, style = 'B')

    tryCatch({
      mi.mc <- spdep::moran.mc(raster::values(r),ww,nsim=nsim,na.action = na.omit, zero.policy = TRUE)
    },
    error = function(e){
      errorFlag <<- TRUE
      message("* Caught an error on gene ", gene)
    })
    if (errorFlag) {
      return(data.frame(gene = gene, moran = NA, p.value = NA, valid.values = NA))
    }
    
    
    return(data.frame(gene = gene, moran = mi.mc$statistic, p.value = mi.mc$p.value, valid.values = sum(!is.na(raster::as.matrix(r)))))
  }
  stopImplicitCluster()
  rownames(a) <- NULL
  return(a)
  
  
}

#Define function to plot protein intensity in sampled grid layout
plotGeneConcat <- function(protein.data, geneName, nPixels = 384, ncol = 24, nrow = 16,
                           impute = FALSE, colPalette = viridis(24), na.col = "white",
                           scaled = FALSE, subset.data = FALSE, index = c(1:nPixels), ...) {
  filteredGenes <- filter(protein.data, Gene.names == geneName)
  vec <- as.numeric(filteredGenes[1,][1:nPixels])
  
  if (all(is.na(vec))) {
    next
  }
  if (subset.data == TRUE) {
      vec2 <- c(rep(NaN,384))
      vec2[index] <- vec[index]
      vec <- vec2
  }
  if (scaled == TRUE) {
    vec <- scales::rescale(vec, to=c(0,1))
    vec <- vec * 100
  }
  
  plate1 = matrix(rev(vec[1:96]), nrow = 8, ncol = 12, byrow = T)
  plate2 = matrix(rev(vec[97:192]), nrow = 8, ncol = 12, byrow = T)
  plate3 = matrix(rev(vec[193:288]), nrow = 8, ncol = 12, byrow = T)
  plate4 = matrix(rev(vec[289:384]), nrow = 8, ncol = 12, byrow = T)
  
  m = rbind(cbind(plate1,plate2),cbind(plate3,plate4))
  
  r <- raster(m)
  
  if (impute == TRUE) {
    r <- focal(r, w = matrix(1,3,3), fun = fill.na, pad = TRUE, na.rm = FALSE)
  }
  
  plot(as.matrix(r), xlab = '', ylab = '', xaxt = 'n', na.col = na.col,
       #col = colPalette, main = paste(geneName), ...)
       col = colPalette, ...)
}

#Define function to plot cluster assignment in sampled grid layout
plotClusters <- function(vec, nPixels, ncol, nrow, multiplate = FALSE, print.matrix = FALSE,
                         title = NULL, byrow = TRUE,
                         #col = colorRampPalette(brewer.pal(12, "Set3"))(n.col),
                         col = colorRampPalette(brewer.pal(9, "Set1"))(n.col), na.col = "white",
                         ...) {
    
  if (all(is.na(vec))) {
    next
  }
  
  n.col = length(unique(as.numeric(vec)[!is.na(as.numeric(vec))]))
  
  if (multiplate == TRUE) {
    plate1 = matrix(rev(vec[1:96]), nrow = 8, ncol = 12, byrow = byrow)
    plate2 = matrix(rev(vec[97:192]), nrow = 8, ncol = 12, byrow = byrow)
    plate3 = matrix(rev(vec[193:288]), nrow = 8, ncol = 12, byrow = byrow)
    plate4 = matrix(rev(vec[289:384]), nrow = 8, ncol = 12, byrow = byrow)
    
    m = rbind(cbind(plate1,plate2),cbind(plate3,plate4))
    
    
    r <- raster(m)
  } else {
    r <- raster(matrix(vec, nrow = nrow, ncol = ncol, byrow = byrow))
  }
  
  if (print.matrix == TRUE) {
    print(as.matrix(r))
  }
  
  plot(as.matrix(r), xlab = '', ylab = '', xaxt = 'n', na.col = na.col,
       col = col, main = title, ...)
       #col = colorRampPalette(c("gray", "black"))(n.col), main = title, ...)
}

Load data

mq.output <- readMQ("Data/MaxQuant Result - 833 um.txt")
dim(mq.output)
## [1] 5758 3107

Remove false positives and potential contaminant proteins

filterColumns <- c("Only.identified.by.site", "Reverse", "Potential.contaminant")
mq.filtered <- filter_at(mq.output, vars(all_of(filterColumns)), all_vars(. != "+"))
dim(mq.filtered)
## [1] 5373 3107

Remove columns not required

quantCols <- names(mq.filtered[,grepl("LFQ.intensity", names(mq.filtered))])
idCols <- names(mq.filtered[,c("Protein.IDs","Majority.protein.IDs",
                         "Protein.names","Gene.names")])
data <- mq.filtered[,c(quantCols,idCols)]

log2 transform and median centre

data[quantCols] <- log2(data[,quantCols])
data <- data %>% mutate_if(is.numeric, list(~na_if(., -Inf))) # remove infinite values from log-transform
data.melt <- reshape2::melt(data)
## Using Protein.IDs, Majority.protein.IDs, Protein.names, Gene.names as id variables
intensityDistribution <- ggplot(data.melt, aes(x = variable, y = value)) +
  geom_boxplot(notch = FALSE, outlier.shape = NA) +
  scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = "Sample", y = "Intensity") +
  theme_classic()
plot(intensityDistribution)
## Warning: Removed 1127388 rows containing non-finite values (stat_boxplot).

data.norm <- cbind(normalise(data[,quantCols]),data[,idCols])

data.norm.melt <- reshape2::melt(data.norm)
## Using Protein.IDs, Majority.protein.IDs, Protein.names, Gene.names as id variables
intensityDistribution.norm <- ggplot(data.norm.melt, aes(x = variable, y = value)) +
  geom_boxplot(notch = FALSE, outlier.shape = NA) +
  scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = "Sample", y = "Normalised Intensity") +
  theme_classic()
plot(intensityDistribution.norm)
## Warning: Removed 1127388 rows containing non-finite values (stat_boxplot).

Calculate spatial autocorrelation (Moran’s I) of each protein.

Pre-calculated values are included for speed. To recalculate values, comment/uncomment lines as described below Adjust ‘cores’ to the number of logical cores to split the workload over

#Uncomment this block to re-calculate, adjust cores to appropriate value based on computer hardware
# data.moran <- moran.Parallel(data.norm,
#                               data.norm$Gene.names,
#                               nPixels = 384,
#                               multiPlate = TRUE,
#                               nrow = 16,
#                               ncol = 24,
#                               cores = 4
#                               )
#saveRDS(data.moran, "Data/data-moran-recalculated.rds")

# remove this line if recalculating, otherwise pre-calculated values will load
data.moran <- readRDS("Data/data-moran.rds")

Filter correlated proteins and FDR-correct

data.moran$q.value <- p.adjust(data.moran$p.value, method = "BH")

Plotting protein maps

titles <- c("Peripherin", "Glycogen phosphorylase, liver form", "Histone H4",
            "Haemoglobin", "Aspartate beta-Hydroxylase", "CD45")
i <- 0
for (gene in c('PRPH','PYGL','HIST1H4A','HBB', 'ASPH', 'PTPRC')) {
    i <- i + 1
    par(mar = c(2,2,2,6))
        res <- plotGeneConcat(data.norm, gene, scaled = TRUE, main = paste(titles[i]), cex = 12,
                 impute = F, axis.col=NULL, axis.row=NULL,
                 asp = T, col = viridis(48), na.col = "gray70", fmt.key = "%.0f %%",
                 key=list(side = 4, las = 1, tick = FALSE, labels = FALSE),
                 polygon.key = list(border = NA))

}

Clustering

As some sampled voxels were either empty or contained only haemorrhage, these need to be removed

voxel.type <- c(rep('Tissue',84),rep('Haemorrhage',2),rep('Tissue',10), #plate1
             rep('Tissue',52),rep('Haemorrhage',3),rep('Tissue',5),'Empty',rep('Tissue',2),rep('Haemorrhage',5),rep('Tissue',4),'Empty','Tissue',rep('Haemorrhage',9),'Tissue','Empty',rep('Haemorrhage',11), #plate2
             rep(c(rep('Tissue',11),'Empty'),7),rep('Tissue',12), #plate3
             'Empty',rep('Tissue',11),'Empty',rep('Tissue',11),rep('Tissue',72))#plate4


plate1 = matrix(rev(voxel.type[1:96]), nrow = 8, ncol = 12, byrow = T)
plate2 = matrix(rev(voxel.type[97:192]), nrow = 8, ncol = 12, byrow = T)
plate3 = matrix(rev(voxel.type[193:288]), nrow = 8, ncol = 12, byrow = T)
plate4 = matrix(rev(voxel.type[289:384]), nrow = 8, ncol = 12, byrow = T)

voxel.type.m = rbind(cbind(plate1,plate2),cbind(plate3,plate4))
par(mar = c(2,0,2,6))
plot(voxel.type.m, axis.col=NULL, axis.row=NULL, xlab='', ylab='', main = '', asp = T,
     col = c("#FFFFCC","#A1DAB4","#225EA8"), breaks = c("Tissue","Haemorrhage","Empty"))

rm(plate1,plate2,plate3,plate4)
tissue <- names(data.norm[,seq(1:384)[voxel.type == 'Tissue']])
tissue.index <- seq(1:384)[voxel.type == 'Tissue']
tissue.empty <- names(data.norm[,seq(1:384)[voxel.type == 'Empty']])
tissue.empty.index <- seq(1:384)[voxel.type == 'Empty']
tissue.haemorrhage <- names(data.norm[,seq(1:384)[voxel.type == 'Haemorrhage']])
tissue.haemorrhage.index <- seq(1:384)[voxel.type == 'Haemorrhage']

data.tissue <- data[, voxel.type == "Tissue"]
data.norm.tissue <- data.norm[, voxel.type == "Tissue"]

Distance matrix and hierarchical clustering

data.tissue.t <- transpose(data.tissue)
rownames(data.tissue.t) <- colnames(data.tissue)
colnames(data.tissue.t) <- data.tissue.t["Gene.names", ]
dist.tissue <- dist(data.tissue.t[1:342,])
hclust.tissue <- hclust(dist.tissue, method = "average")
data.norm.tissue.t <- transpose(data.norm.tissue)
rownames(data.norm.tissue.t) <- colnames(data.norm.tissue)
colnames(data.norm.tissue.t) <- data.norm.tissue.t["Gene.names", ]
dist.tissue.norm <- dist(data.norm.tissue.t[1:342,])
hclust.tissue.norm <- hclust(dist.tissue, method = "average")

Assign clusters to dendrogram and plot clusters onto grid sampled

dynamicClusters <- cutreeDynamic(hclust.tissue.norm, distM = as.matrix(dist.tissue.norm),
                                      minClusterSize = 1,
                                      method = "hybrid",
                                      cutHeight = 100)
##  ..done.
temp <- c(rep(NaN,384))
temp[tissue.index] <- dynamicClusters

par(mar = c(2,2,2,2))
plotClusters(temp,
             title = "",
             axis.col=NULL, axis.row=NULL,
             na.col = "gray70", na.print = FALSE,
             nrow = data.cluster.nrow[7],
             ncol = data.cluster.ncol[7],
             multiplate = TRUE,
             fmt.cell='%.0f', cex = 1.25,
             key = NULL,
             asp = T
)

#reinsert dropped voxels
clusters <- data.frame(sample = tissue.index, cluster = dynamicClusters)
empty <- c(1:384)[!c(1:384) %in% clusters$sample]
df.empty <- data.frame(sample = empty, cluster = "Empty")
clusters <- rbind(clusters, df.empty)
clusters <- clusters[order(clusters$sample),]