library(readxl) # version 1.4.2
library(dplyr) # version 1.1.2
library(lubridate) # version 1.9.2
library(RColorBrewer) # version 1.1-3
library(FSA) # version 0.9.4
library(ggh4x) # version 0.2.4
library(fixest) # version 0.11.1
library(ggplot2) # version 3.4.2
library(qiime2R) # version 0.99.6
library(cowplot) # version 1.1.1
library(readr) # version 2.1.4
library(ggpubr) # version 0.6.0
library(ggrepel) # version 0.9.3
library(vegan) # version 2.6-4
library(ANCOMBC) # version 2.0.2
library(phyloseq) # version 1.42.0
library(tidyr) # version 1.3.0
library(microViz) # version 0.10.8
library(jsonlite) # version 1.8.4
library(MicrobiotaProcess) # version 1.12.3
library(gtools) # version 3.9.4
library(igraph) # version 1.4.2
library(stringr) # version 1.5.0
library(multcomp) # version 1.4-25
set.seed(5)
Metadata281 <- read_excel("data/Metadata281.xlsx")
# Rename some columns
colnames(Metadata281)[colnames(Metadata281) == 'CD4+_count'] <- 'CD4_count'
colnames(Metadata281)[colnames(Metadata281) == 'Cohort'] <- 'Cohort_Description'
colnames(Metadata281)[colnames(Metadata281) == 'HIV-1_viral_load'] <- 'HIV1_viral_load'
colnames(Metadata281)[colnames(Metadata281) == 'IL-6_pg_mL'] <- 'IL6_pg_mL'
# Create viral outcome column
Metadata281$HIV1_viral_load[Metadata281$HIV1_viral_load == 'ND'] <- 0
Metadata281$HIV1_viral_load <- as.numeric(Metadata281$HIV1_viral_load)
Metadata281$HIV1_viral_outcome <- 'Suppressed'
Metadata281$HIV1_viral_outcome[Metadata281$Cohort_Short == 'A' &
Metadata281$Visit == 3 &
Metadata281$HIV1_viral_load > 200] <- 'Viremic'
Metadata281$HIV1_viral_outcome[Metadata281$Cohort_Short == 'B' &
Metadata281$Visit == 2 &
Metadata281$HIV1_viral_load > 200] <- 'Viremic'
Metadata281$HIV1_viral_outcome <- as.factor(Metadata281$HIV1_viral_outcome)
# Calculate length on ART column
Metadata281$ARTStart[Metadata281$Cohort_Short == "B"] <- Metadata281$ART_Start_Date[Metadata281$Cohort_Short == "B"]
Metadata281$ARTStart <- as.Date(Metadata281$ARTStart, "%Y")
lubridate::day(Metadata281$ARTStart) <- 1
lubridate::month(Metadata281$ARTStart) <- 6
Metadata281$Visit_date <- as.Date(Metadata281$Visit_date, "%m/%d/%y")
Metadata281$LenghtOnArt_yrs[Metadata281$Cohort_Short == "A"] <- 0.5
Metadata281$LenghtOnArt_yrs[Metadata281$Cohort_Short == "C"] <- 0.0
Metadata281$LenghtOnArt_yrs[Metadata281$Cohort_Short == "B"] <- time_length(Metadata281$Visit_date[Metadata281$Cohort_Short == "B"] - Metadata281$ARTStart[Metadata281$Cohort_Short == "B"], "years")
# Make cleaner columns for visualizations
Metadata281$Location[Metadata281$Arm_Short == "MA"] <- "Urban"
Metadata281$Location[Metadata281$Arm_Short == "MT"] <- "Rural"
Metadata281$Cohort[Metadata281$Cohort_Short == "A"] <- "Naïve"
Metadata281$Cohort[Metadata281$Cohort_Short == "B"] <- "Exp"
Metadata281$Cohort[Metadata281$Cohort_Short == "C"] <- "HC"
Metadata281$Week[Metadata281$Visit == 2] <- 0
Metadata281$Week[Metadata281$Visit == 3] <- 24
Metadata281$Location <- as.factor(Metadata281$Location)
Metadata281$Cohort <- as.factor(Metadata281$Cohort)
Metadata281$Cohort <- ordered(Metadata281$Cohort, levels = c("Naïve", "Exp", "HC"))
Metadata281$Week <- as.factor(Metadata281$Week)
# make correct factored variables
fact_cols <- c("Gender", "HIV_Status", "Bristol_Score", "Water_Source_Num",
"Cotrimoxazole_HIV", "Manual_work_job", "Manual_chores_home",
"Head_of_household", "Education_Level", "Normal_work_transportation")
Metadata281[fact_cols] <- lapply(Metadata281[fact_cols], factor)
Metadata281$BMI_Cat <- ordered(Metadata281$BMI_Cat, levels = c("Severe_thinness", "Moderate_thinnes", "Mild_thinness", "Normal", "Overweight", "Obese"))
# make sure immune markers are numeric
immune_markers <- c("CD4_Percent",
"CD4_HLADRpos_CD38pos_Percent",
"CD8_HLADRpos_CD38pos_Percent",
"CD4_PD1_Percent",
"CD8_PD1_Percent",
"CD4_CD103_Percent",
"CD8_CD103_Percent",
"IL6_pg_mL",
"CRP_mg_L")
for (marker in immune_markers){
Metadata281[[marker]] <- as.numeric(Metadata281[[marker]])
}
SupplementalFig3 <- Metadata281
# Impute means for immune markers where needed
for(marker in immune_markers){
Metadata281[[marker]][is.na(Metadata281[[marker]])] <- mean(Metadata281[[marker]], na.rm = TRUE)
}
# Make sure to remove sample with no immune data
Metadata281 <- Metadata281 %>%
filter(SampleID != "ZIM033.3")
# Keep only samples with two visits
Metadata281 <- Metadata281[Metadata281$PID %in% Metadata281$PID[duplicated(Metadata281$PID)],]
# Remove patients that were viremic
vir_PID <- Metadata281 %>%
filter(HIV1_viral_outcome == "Viremic") %>%
dplyr::select(SampleID, PID, Cohort_Short, HIV1_viral_outcome)
PIDs <- unique(vir_PID$PID)
Metadata281$HIV1_viral_outcome[Metadata281$PID %in% PIDs] <- 'Viremic'
# get figure 1 inter data
inter_fig1 <- Metadata281 %>%
filter(!(HIV1_viral_outcome == "Viremic" & Cohort_Short == "B")) %>%
filter(Visit == 2)
# remove all viremmic patients from metadata
Metadata281 <- subset(Metadata281, !(PID %in% PIDs))
# Get dataframe of just baseline measurements
baseline <- Metadata281 %>% filter(Week == 0)
# Creating a color palettes
visitColors <- palette(brewer.pal(n = 5, name = "BrBG"))
cohortColors <- palette(brewer.pal(n = 5, name = "PiYG"))
# Figure 1
sig_df <- data.frame(matrix(ncol = 5, nrow = 0))
colnames(sig_df) <- c("marker", "comparison", "pval", "direction", "patients")
categories <- c("All", "Urban", "Rural")
markers <- c("CD4_HLADRpos_CD38pos_Percent",
"CD8_HLADRpos_CD38pos_Percent",
"CD4_PD1_Percent",
"CD8_PD1_Percent",
"CD4_CD103_Percent",
"CD8_CD103_Percent",
"IL6_pg_mL",
"CRP_mg_L")
for (category in categories){
if(category != "All"){
heatmap_df <- Metadata281 %>% filter(Location == category)
heatmap_baseline <- inter_fig1 %>% filter(Location == category)
}else{
heatmap_df <- Metadata281
heatmap_baseline <- inter_fig1
}
for (marker in markers){
patients <- category
# inter cohort tests
dt <- dunnTest(heatmap_baseline[[marker]], heatmap_baseline$Cohort_Short, method = "bonferroni")
c1 <- "Naïve - Exp"
c2 <- "Naïve - HC"
c3 <- "Exp - HC"
p1 <- dt$res$P.adj[1]
p2 <- dt$res$P.adj[2]
p3 <- dt$res$P.adj[3]
mean <- heatmap_baseline %>%
group_by(Cohort_Short) %>%
summarise(avg = mean(eval(as.name(marker))))
avgA <- mean[mean$Cohort_Short == "A",2]$avg
avgB <- mean[mean$Cohort_Short == "B",2]$avg
avgC <- mean[mean$Cohort_Short == "C",2]$avg
diffAB <- avgA - avgB
diffAC <- avgA - avgC
diffBC <- avgB - avgC
r1 <- c(marker, c1, p1, diffAB, patients)
r2 <- c(marker, c2, p2, diffAC, patients)
r3 <- c(marker, c3, p3, diffBC, patients)
sig_df[nrow(sig_df)+1, ] <- r1
sig_df[nrow(sig_df)+1, ] <- r2
sig_df[nrow(sig_df)+1, ] <- r3
# intra cohort tests
sub <- heatmap_df %>% filter(Cohort_Short == "A")
comparison <- "Naïve: 0 - 24"
t <- wilcox.test(sub[[marker]] ~ Visit, data = sub, paired = T)
mean <- sub %>%
group_by(Visit) %>%
summarise(avg = mean(eval(as.name(marker))))
avg1 <- mean[mean$Visit == 2,2]$avg
avg2 <- mean[mean$Visit ==3,2]$avg
mean_diff <- avg1 - avg2
r <- c(marker, comparison, t$p.value, mean_diff, patients)
sig_df[nrow(sig_df)+1, ] <- r
sub <- heatmap_df %>% filter(Cohort_Short == "B")
comparison <- "Exp: 0 - 24"
t <- wilcox.test(sub[[marker]] ~ Visit, data = sub, paired = T)
mean <- sub %>%
group_by(Visit) %>%
summarise(avg = mean(eval(as.name(marker))))
avg1 <- mean[mean$Visit == 2,2]$avg
avg2 <- mean[mean$Visit ==3,2]$avg
mean_diff <- avg1 - avg2
r <- c(marker, comparison, t$p.value, mean_diff, patients)
sig_df[nrow(sig_df)+1, ] <- r
sub <- heatmap_df %>% filter(Cohort_Short == "C")
comparison <- "HC: 0 - 24"
t <- wilcox.test(sub[[marker]] ~ Visit, data = sub, paired = T)
mean <- sub %>%
group_by(Visit) %>%
summarise(avg = mean(eval(as.name(marker))))
avg1 <- mean[mean$Visit == 2,2]$avg
avg2 <- mean[mean$Visit ==3,2]$avg
mean_diff <- avg1 - avg2
r <- c(marker, comparison, t$p.value, mean_diff, patients)
sig_df[nrow(sig_df)+1, ] <- r
}
}
sig_df$direction <- as.numeric(sig_df$direction)
sig_df$pval <- as.numeric(sig_df$pval)
sig_df$patients <- as.factor(sig_df$patients)
sig_df$Significance <- 'NS'
sig_df$Significance[sig_df$pval <= 0.05] <- '*'
sig_df$Significance[sig_df$pval <= 0.01] <- '**'
sig_df$Significance[sig_df$pval <= 0.001] <- '***'
sig_df$Significance[sig_df$pval <= 0.0001] <- '****'
sig_df$Significance <- as.factor(sig_df$Significance)
sig_df$direction <- sig_df$direction * -1
sig_df$direction[sig_df$pval > 0.05] <- NA
markers_labels <- c("CD4+CD38+HLA-DR+ (%)",
"CD8+CD38+HLA-DR+ (%)",
"CD4+PD1+ (%)",
"CD8+PD1+ (%)",
"CD4+CD103+ (%)",
"CD8+CD103+ (%)",
"IL-6 (pg/mL)",
"CRP (mg/L)")
for (i in 1:length(markers)){
sig_df$marker[sig_df$marker == markers[i]] <- markers_labels[i]
}
sig_df$category[sig_df$comparison == "Naïve - Exp" |
sig_df$comparison == "Naïve - HC" |
sig_df$comparison == "Exp - HC"] <- "Inter"
sig_df$category[is.na(sig_df$category)] <- "Intra"
comp <- c("Naïve - Exp","Naïve - HC","Exp - HC", "Naïve: 0 - 24", "Exp: 0 - 24", "HC: 0 - 24")
ggplot(sig_df, aes(x = factor(comparison, levels = comp),
y = factor(marker, levels = markers_labels))) +
geom_raster(aes(fill=direction))+
scale_fill_gradient2(high = "red2", mid = "white", low = "deepskyblue3", na.value = "lightgrey", midpoint = 0,
limits = c(-25, 25))+
geom_text(aes(label=Significance), size = 6) +
theme_bw() +
xlab("") +
ylab("") +
theme(text = element_text(size = 18),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
legend.key = element_rect(colour = "black", size = 1),
legend.title = element_text(size = 14),
legend.text=element_text(size=12)) +
guides(fill=guide_legend(title="Percent \nDelta", reverse = T)) +
facet_nested(~patients + category, scales = "free_x")