This is an analysis of differential expression at the gene level between supershedder groupings based on shedding rates measured on the day of sacrifice for infected mallard ileum samples.

## Load packages and data
library(limma)
library(edgeR)
library(Glimma)
library(gplots)
library(ggrepel)
library(RColorBrewer)
library(gridExtra)
library(kableExtra)
library(tidyverse)

## Load data
cnt1 <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/ileumCounts.txt", delim = "\t") %>% distinct(gene, .keep_all = TRUE)
covars <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/MALL70_SSgroup_RAW_11.5.20.csv", delim = ",")
annot <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/annotationGenesIleum110120.txt", delim = "\t")

Prep data for analysis

cnt <- cnt1 %>%
  as_tibble() %>%
  select(-gene)

newNames.ileum <- names(cnt) %>%
  as_tibble() %>%
  mutate(value = gsub("Iluem_", "", .$value)) %>% 
  separate(value, into = c(NA, "pt1", "pt2")) %>%
  mutate(pt1 = na_if(pt1, "MALL"),
         pt2 = na_if(pt2, "ileum")) %>%
  mutate(bird = ifelse(is.na(pt1), pt2, pt1)) %>%
  mutate(bird = as.numeric(bird),
    bird = formatC(bird, width = 2, format = "d", flag = "0")) %>%
  mutate(newName = paste0("ileum_", bird))

colnames(cnt) <- newNames.ileum$newName

cnt <- cnt[,order(colnames(cnt))] %>%
  mutate(gene = cnt1$gene) %>%
  column_to_rownames("gene") %>%
  as_tibble(rownames = NA)

### Covars
covars <- covars %>%
  mutate(bird = as.numeric(bird)) %>%
  mutate(bird = formatC(bird, width = 2, format = "d", flag = "0")) %>%
  arrange(bird) %>%
  mutate(bird = as.factor(bird)) %>%
  mutate(group = str_remove(group, "-"))

covars.ileum <- covars %>%
  filter(bird %in% newNames.ileum$bird)

cnt <- cnt %>% select(-ileum_01, -ileum_72)

covars.ileum <- covars.ileum %>% filter(bird != "01",
                                        bird != "72")

Prep data for analysis

Here we standardize expression levels across samples of varying depth by using the counts per million (CPM) and trimmed mean of M-values (TMM) methods. We also remove any genes that are expressed at few than 0.5 CPM in at least 25% of individuals.

#Convert to DGEList object
dge <- DGEList(counts=cnt)
dge$genes <- annot[match(rownames(dge$counts), annot$ensembl_gene_id),]

#CPM and log-CPM
cpm <- cpm(dge)
lcpm <- cpm(dge, log = TRUE)

#### Retain only genes expressed at >0.5 CPM in at least 25% of individuals
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
## FALSE  TRUE 
## 17388   728
keep.exprs <- rowSums(cpm>0.5) >= length(cnt[1,])/4
sum(keep.exprs)
## [1] 14563
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
## FALSE 
## 14563
dim(dge)
## [1] 14563    40

Subset data

Following normalization, we subset the data based on infection groups.

#Assign groups in DGElist
group <- recode(covars.ileum$group, C1 = "Ctl", C29 = "Ctl") %>% 
  as.factor()
dge$samples$group <- group

### Divide out I1 and I2
dge.I1 <- dge[,grep('\\bI1\\b', dge$samples$group)]
dge.I1 <- calcNormFactors(dge.I1, method = "TMM")

dge.I2 <- dge[,grep('\\bI2\\b', dge$samples$group)]
dge.I2 <- calcNormFactors(dge.I2, method = "TMM")

covars.I1 <- covars %>% filter(group == "I1", bird != "14", bird != "32", bird != "47")
covars.I2 <- covars %>% filter(group == "I2")

Differential expression analysis

Establish design & contrast matrices

## Establish design matrix: I1
design.I1 = model.matrix(~ 0 +
                        factor(covars.I1$SSgroup.virus.sac) +
                        covars.I1$age +
                        covars.I1$sex + 
                        covars.I1$Bursa_pool)

colnames(design.I1) <- c("HIGH", "LOW", "MODERATE",
                         "age", "sexM", "poolMSU2",
                         "poolMSU3", "poolMSU4", "poolMSU5")


contr.matrix.I1 = makeContrasts(
  LvM = LOW - MODERATE,
  MvH = MODERATE - HIGH,
  LvH = LOW - HIGH,
  levels = colnames(design.I1))

## Mean-variance trend plots
v.I1 <- voomWithQualityWeights(dge.I1, design.I1, plot = FALSE)

#Fitting models
vfit.I1 <- lmFit(v.I1, design.I1) 
vfit.I1 <- contrasts.fit(vfit.I1, contrasts = contr.matrix.I1)
tfit.I1 <- treat(vfit.I1, lfc = 0.5)

## Identify DE genes
dt.I1 <- decideTests(tfit.I1, p.value = 0.1, adjust.method = "fdr")
dt.tib.I1 <- as_tibble(dt.I1, rownames = NA) %>% 
  rownames_to_column("gene") %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  rename(LvM.I1 = LvM,
         MvH.I1 = MvH,
         LvH.I1 = LvH) %>% 
  filter_at(vars(LvM.I1:LvH.I1), any_vars(. != 0))


## Establish design matrix: I2
design.I2 = model.matrix(
  ~ 0 +
    factor(covars.I2$SSgroup.virus.sac) +
    covars.I2$age +
    covars.I2$sex +
    covars.I2$Bursa_pool
)

colnames(design.I2) <- c(
  "HIGH",
  "LOW",
  "MODERATE",
  "age",
  "sexM",
  "poolMSU2",
  "poolMSU3",
  "poolMSU4",
  "poolMSU5"
)

contr.matrix.I2 = makeContrasts(
  LvM = LOW - MODERATE,
  MvH = MODERATE - HIGH,
  LvH = LOW - HIGH,
  levels = colnames(design.I2))

## Mean-variance trend plots
v.I2 <- voomWithQualityWeights(dge.I2, design.I2, plot = FALSE)

#Fitting models
vfit.I2 <- lmFit(v.I2, design.I2) 
vfit.I2 <- contrasts.fit(vfit.I2, contrasts = contr.matrix.I2)
tfit.I2 <- treat(vfit.I2, lfc = 0.5)

## Identify DE genes
dt.I2 <- decideTests(tfit.I2, p.value = 0.1, adjust.method = "fdr")
dt.tib.I2 <- as_tibble(dt.I2, rownames = NA) %>% 
  rownames_to_column("gene") %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  rename(LvM.I2 = LvM,
         MvH.I2 = MvH,
         LvH.I2 = LvH) %>% 
  filter_at(vars(LvM.I2:LvH.I2), any_vars(. != 0))


## Bring it all together
dt.tib <- full_join(dt.tib.I1, dt.tib.I2) %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  replace(is.na(.), 0)
## Joining, by = "gene"

Count DE genes

For a gene to be considered differentially expressed, we require a p-value of 0.1 with a false discovery rate correction and a log fold count difference of 1.

allResults <- rbind(
  as.data.frame(summary(dt.I1)),
  as.data.frame(summary(dt.I2))) %>% 
  filter(Var1 != "NotSig")

kable(allResults) %>%
  kable_styling("striped", full_width = F) %>%
  pack_rows("1 Day Post Infection", 1, 6) %>%
  pack_rows("2 Days Post Infection", 7, 12)
Var1 Var2 Freq
1 Day Post Infection
Down LvM 1
Up LvM 0
Down MvH 0
Up MvH 0
Down LvH 0
Up LvH 0
2 Days Post Infection
Down LvM 4
Up LvM 2
Down MvH 8
Up MvH 25
Down LvH 27
Up LvH 106

Volcano plot

tmp1 <- topTreat(tfit.I1, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. Moderate shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")


tmp2 <- topTreat(tfit.I1, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Moderate vs. High shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp3 <- topTreat(tfit.I1, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. High shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp4 <- topTreat(tfit.I2, coef = 1, n = Inf)
results4 <- mutate(tmp4, sig=ifelse(tmp4$adj.P.Val<0.1, "Sig", "Not Sig"))
p4 <- ggplot(results4, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. Moderate shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp5 <- topTreat(tfit.I2, coef = 2, n = Inf)
results5 <- mutate(tmp5, sig=ifelse(tmp5$adj.P.Val<0.1, "Sig", "Not Sig"))
p5 <- ggplot(results5, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Moderate vs. High shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp6 <- topTreat(tfit.I2, coef = 3, n = Inf)
results6 <- mutate(tmp6, sig=ifelse(tmp6$adj.P.Val<0.1, "Sig", "Not Sig"))
p6 <- ggplot(results6, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. High shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

grid.arrange(p1, p2, p3, p4, p5, p6, nrow = 2)

Heatmap - I2

# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I2$counts, rownames = NA) %>% 
  cpm(., log = TRUE)

newNames <- colnames(lcpm2) %>% 
  as_tibble() %>% 
  separate(value, into = c(NA, "bird")) %>% 
  bind_cols(covars.I2) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colOrder <- bind_cols(covars.I2) %>% 
  mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
  arrange(SSgroup.virus.sac, group, bird) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]

heatmap.annot <- annot %>%
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>% 
  unite(annot.hm, c(ensembl_gene_id, hgnc_symbol), sep = "-", remove = FALSE) %>% 
  select(annot.hm, ensembl_gene_id) %>% 
  distinct(ensembl_gene_id, .keep_all = TRUE)

deGeneCounts <- lcpm2 %>% 
  as_tibble() %>% 
  mutate(ensembl_gene_id = rownames(dge.I2$counts)) %>% 
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  left_join(heatmap.annot) %>% 
  column_to_rownames("annot.hm") %>%
  select(-ensembl_gene_id) %>% 
  as.matrix()

## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)

hc <- hclust(as.dist(1-cor(t(deGeneCounts))))

# Plot the heatmap
heatmap.2(deGeneCounts,
          Colv = FALSE,
          Rowv = as.dendrogram(hc),
          col = rev(morecols(50)),
          trace = "none",
          colsep = c(4, 8),
          dendrogram = "row",
          density.info = "none",
          key = TRUE,
          margins = c(12, 12),
          scale ="row")

Boxplots for I1 differentially expressed genes

lcpm.DE <- lcpm %>%
  as_tibble(rownames = NA) %>% 
  rownames_to_column("identifier") %>% 
  filter(identifier %in% dt.tib.I1$gene) %>%
  pivot_longer(cols = contains("_"),
               names_to = "sample",
               values_to = "lcpm") %>%
  separate(sample, into = c("tissue", "bird")) %>%
  mutate(bird = as.factor(bird)) %>%
  left_join(covars, by = "bird") %>%
  filter(group == "C1" | group == "C14" | group == "I1") %>% 
  select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>% 
  mutate(group = recode(group, C29 = "Control"),
         group = recode(group, C1 = "Control"),
         tmpID = ifelse(group == "Control", 'Control', 'Infected'))


aprioriPlotting <- function(target, ...) {
   annot.target <- annot %>%
    select(
      ensembl_gene_id,
      hgnc_symbol) %>%
    filter(ensembl_gene_id == target) %>% 
    mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))

  plot <- lcpm.DE %>%
    filter(identifier == target) %>%
    mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
    ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
    facet_grid(. ~ tmpID, scales = "free", space = "free") +
    ylab("Log2(Counts per million)") +
    xlab("Shedding Group") +
    scale_fill_grey(start = 0.35, end = 1) + 
    geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
    geom_boxplot(alpha = 0.5) +
    geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
    theme_classic() +
    labs(title = paste0("I1: ", target, " - ", annot.target[1,2])) +
    theme(legend.title = element_blank())
    
  print(plot)
}

for(target in sort(unique(dt.tib.I1$gene))) {
  aprioriPlotting(target)
}

Boxplots for I2 differentially expressed genes

lcpm.DE <- lcpm %>%
  as_tibble(rownames = NA) %>% 
  rownames_to_column("identifier") %>% 
  filter(identifier %in% dt.tib.I2$gene) %>%
  pivot_longer(cols = contains("_"),
               names_to = "sample",
               values_to = "lcpm") %>%
  separate(sample, into = c("tissue", "bird")) %>%
  mutate(bird = as.factor(bird)) %>%
  left_join(covars, by = "bird") %>%
  filter(group == "C1" | group == "C14" | group == "I2") %>% 
  select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>% 
  mutate(group = recode(group, C29 = "Control"),
         group = recode(group, C1 = "Control"),
         tmpID = ifelse(group == "Control", 'Control', 'Infected'))


aprioriPlotting <- function(target, ...) {
   annot.target <- annot %>%
    select(
      ensembl_gene_id,
      hgnc_symbol) %>%
    filter(ensembl_gene_id == target) %>% 
    mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))

  plot <- lcpm.DE %>%
    filter(identifier == target) %>%
    mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
    ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
    facet_grid(. ~ tmpID, scales = "free", space = "free") +
    ylab("Log2(Counts per million)") +
    xlab("Shedding Group") +
    scale_fill_grey(start = 0.35, end = 1) + 
    geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
    geom_boxplot(alpha = 0.5) +
    geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
    theme_classic() +
    labs(title = paste0("I2: ", target, " - ", annot.target[1,2])) +
    theme(legend.title = element_blank())
    
  print(plot)
}

for(target in sort(unique(dt.tib.I2$gene))) {
  aprioriPlotting(target)
}

Annotations for differentially expressed genes

ns denotes non-significant genes for each comparison and numerical values are the log(fold count) difference

tmp1a <- tmp1 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I1")
tmp2a <- tmp2 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I1")
tmp3a <- tmp3 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I1")
tmp4a <- tmp4 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I2")
tmp5a <- tmp5 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I2")
tmp6a <- tmp6 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I2")

bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>% 
  select(-description) %>% 
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  mutate(logFoldCount = ifelse(adj.P.Val < 0.10, round(logFC, 2), "ns")) %>% 
  select(-adj.P.Val, -logFC, -gene_biotype) %>% 
  pivot_wider(names_from = comp, values_from = logFoldCount) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id hgnc_symbol LvM.I1 MvH.I1 LvH.I1 LvM.I2 MvH.I2 LvH.I2
ENSAPLG00020007325 NA -3.72 ns ns ns ns ns
ENSAPLG00020005151 NA ns ns ns -7.67 29.06 21.4
ENSAPLG00020002099 SUSD2 ns ns ns ns ns 12.11
ENSAPLG00020012418 LAMB3 ns ns ns ns ns 10.97
ENSAPLG00020001571 NA ns ns ns ns 16.95 13.54
ENSAPLG00020001757 NA ns ns ns ns 15.36 13.19
ENSAPLG00020001436 CPO ns ns ns ns 13.76 11.89
ENSAPLG00020016143 PCK1 ns ns ns ns ns 11.08
ENSAPLG00020013723 NA ns ns ns ns ns 11.33
ENSAPLG00020016534 NA ns ns ns ns ns 9.26
ENSAPLG00020015167 MME ns ns ns ns ns 8.45
ENSAPLG00020009971 NA ns ns ns ns 16.15 12.97
ENSAPLG00020005323 PLB1 ns ns ns ns ns 11.28
ENSAPLG00020009050 NA ns ns ns ns ns 10.98
ENSAPLG00020003693 NA ns ns ns ns ns 8.58
ENSAPLG00020000462 SULT1C3 ns ns ns ns 16.51 12.52
ENSAPLG00020001772 NA ns ns ns ns 23.22 16.33
ENSAPLG00020006634 SLC10A2 ns ns ns ns 22.35 19.58
ENSAPLG00020016544 ACE2 ns ns ns ns ns 10.76
ENSAPLG00020005311 SLC3A1 ns ns ns ns ns 8.97
ENSAPLG00020005022 ADGRG7 ns ns ns ns ns 9.42
ENSAPLG00020008901 PEPD ns ns ns ns ns 9.27
ENSAPLG00020009556 CDKN2A ns ns ns ns ns 8.93
ENSAPLG00020008627 NQO1 ns ns ns ns 19.74 15.84
ENSAPLG00020004729 HKDC1 ns ns ns ns ns 8.68
ENSAPLG00020004842 NA ns ns ns 9.84 ns ns
ENSAPLG00020010989 NA ns ns ns ns ns 10.23
ENSAPLG00020001209 APOB ns ns ns ns 14.58 13.03
ENSAPLG00020003190 SLC5A1 ns ns ns ns ns 11.4
ENSAPLG00020013029 NA ns ns ns ns ns 9.18
ENSAPLG00020007419 DPEP1 ns ns ns ns ns 10.13
ENSAPLG00020017883 XPNPEP2 ns ns ns ns ns 8.47
ENSAPLG00020013655 ENTPD5 ns ns ns ns ns 9.2
ENSAPLG00020000801 NA ns ns ns ns ns 8.32
ENSAPLG00020007676 NA ns ns ns ns ns 8.67
ENSAPLG00020007551 NA ns ns ns ns ns 10.43
ENSAPLG00020011928 NA ns ns ns ns ns 12.34
ENSAPLG00020006270 NA ns ns ns ns ns 8.75
ENSAPLG00020017864 NA ns ns ns ns ns 8.77
ENSAPLG00020009266 GDA ns ns ns ns ns 11.15
ENSAPLG00020003980 NA ns ns ns ns ns 16.09
ENSAPLG00020014973 AQP4 ns ns ns ns ns 12.72
ENSAPLG00020008932 SLC5A12 ns ns ns ns 17.05 14.69
ENSAPLG00020008682 ALDOB ns ns ns ns 14.9 11.94
ENSAPLG00020000944 TMEM37 ns ns ns ns ns 8.19
ENSAPLG00020004413 AQP8 ns ns ns ns ns 33.87
ENSAPLG00020015106 CYP4F8 ns ns ns ns ns 9.46
ENSAPLG00020004241 NA ns ns ns ns 16.32 14.36
ENSAPLG00020000579 NA ns ns ns ns ns 8.72
ENSAPLG00020017868 NA ns ns ns ns ns 13.81
ENSAPLG00020003684 NA ns ns ns ns 21 15.78
ENSAPLG00020013926 OLFM4 ns ns ns ns ns -10.08
ENSAPLG00020018096 SLC2A5 ns ns ns ns ns 8.96
ENSAPLG00020000128 SERPINB1 ns ns ns ns ns 9.37
ENSAPLG00020005067 NA ns ns ns ns ns 13.41
ENSAPLG00020005597 KCNJ15 ns ns ns ns ns 8.99
ENSAPLG00020017448 LGALS3 ns ns ns ns ns 10.15
ENSAPLG00020008527 F2RL1 ns ns ns ns ns 9.86
ENSAPLG00020015547 NA ns ns ns ns 20.22 17.16
ENSAPLG00020005500 NR0B2 ns ns ns ns ns 8.56
ENSAPLG00020000836 NA ns ns ns ns 19.54 16
ENSAPLG00020011820 NA ns ns ns ns ns 9.1
ENSAPLG00020007487 NA ns ns ns ns ns 10.49
ENSAPLG00020013900 NA ns ns ns ns 25.22 22.86
ENSAPLG00020017865 NA ns ns ns ns ns 8.83
ENSAPLG00020007413 NA ns ns ns ns ns 12
ENSAPLG00020008194 NA ns ns ns ns ns 9.66
ENSAPLG00020011484 NA ns ns ns ns ns 8.6
ENSAPLG00020016671 NA ns ns ns ns -17.69 -12.91
ENSAPLG00020006372 CALB1 ns ns ns ns 22.87 18.72
ENSAPLG00020000985 KY ns ns ns ns ns 10.98
ENSAPLG00020007294 NA ns ns ns ns 22.15 18.51
ENSAPLG00020001254 NA ns ns ns ns ns 14.54
ENSAPLG00020013906 NA ns ns ns ns 26.56 24.64
ENSAPLG00020007723 NA ns ns ns ns ns 8.67
ENSAPLG00020010330 NA ns ns ns ns ns -8.52
ENSAPLG00020009562 ACOT12 ns ns ns ns ns 15.92
ENSAPLG00020005630 NA ns ns ns ns ns 8.19
ENSAPLG00020003187 SLC44A2 ns ns ns ns ns 9.99
ENSAPLG00020016235 SLC13A1 ns ns ns ns ns 12.61
ENSAPLG00020008862 NR4A3 ns ns ns ns -20.64 -16.85
ENSAPLG00020013917 NA ns ns ns ns ns 21.96
ENSAPLG00020016406 PCSK9 ns ns ns ns ns 10.85
ENSAPLG00020008172 NA ns ns ns ns ns 9.45
ENSAPLG00020003114 TKFC ns ns ns ns 13.71 10.34
ENSAPLG00020003094 NA ns ns ns ns -15.89 -10.9
ENSAPLG00020004137 NA ns ns ns ns ns -15.59
ENSAPLG00020004389 DDX47 ns ns ns ns ns 8.59
ENSAPLG00020003497 KLF4 ns ns ns ns ns 10.02
ENSAPLG00020017346 PIPOX ns ns ns ns ns 23.87
ENSAPLG00020008534 SLCO4A1 ns ns ns ns ns 9.67
ENSAPLG00020000599 NA ns ns ns ns ns 9.15
ENSAPLG00020003474 DNAJC22 ns ns ns ns ns 14.7
ENSAPLG00020008855 NA ns ns ns ns ns -33.28
ENSAPLG00020011196 NA ns ns ns ns ns 35.03
ENSAPLG00020002652 NA ns ns ns ns ns 10.95
ENSAPLG00020017229 SLC6A4 ns ns ns ns ns 9.74
ENSAPLG00020004558 NA ns ns ns ns ns 9.41
ENSAPLG00020007890 NA ns ns ns -7.54 25.01 17.47
ENSAPLG00020015505 NA ns ns ns ns ns -9.35
ENSAPLG00020013563 SQLE ns ns ns ns ns 11.17
ENSAPLG00020012578 CREB3L3 ns ns ns ns ns 11.02
ENSAPLG00020002747 NA ns ns ns ns ns -24.64
ENSAPLG00020007878 B3GALT5 ns ns ns -6.66 20.19 13.53
ENSAPLG00020004812 NPC1L1 ns ns ns ns ns 8.98
ENSAPLG00020004008 NA ns ns ns ns ns 18.56
ENSAPLG00020004098 NA ns ns ns ns ns 18.75
ENSAPLG00020008569 MFSD4B ns ns ns ns ns 8.8
ENSAPLG00020017400 LTF ns ns ns 8.36 -15.22 ns
ENSAPLG00020010722 DUOX2 ns ns ns -6.19 27.45 21.26
ENSAPLG00020007929 PCP4 ns ns ns ns ns -8.45
ENSAPLG00020006588 S100A13 ns ns ns ns ns -12.82
ENSAPLG00020000169 INSIG1 ns ns ns ns ns 8.91
ENSAPLG00020017133 NA ns ns ns ns ns -8.36
ENSAPLG00020012336 NA ns ns ns ns ns -9.98
ENSAPLG00020006537 S100A12 ns ns ns ns -30.3 -23
ENSAPLG00020015804 APOA4 ns ns ns ns ns 17.14
ENSAPLG00020000876 MSMO1 ns ns ns ns ns 9.87
ENSAPLG00020005335 NA ns ns ns ns ns -16.89
ENSAPLG00020003263 NA ns ns ns ns ns 13.02
ENSAPLG00020000190 NA ns ns ns ns ns -36.82
ENSAPLG00020014632 SERPING1 ns ns ns ns ns -19.93
ENSAPLG00020014118 MRPS23 ns ns ns ns ns -8.86
ENSAPLG00020004059 NA ns ns ns ns ns 13.17
ENSAPLG00020008356 SLC2A2 ns ns ns ns 41.41 31.4
ENSAPLG00020017609 NA ns ns ns ns ns 12.31
ENSAPLG00020007966 NA ns ns ns ns -41.95 -33.71
ENSAPLG00020014477 NA ns ns ns ns ns -9.93
ENSAPLG00020015261 NA ns ns ns ns -14.74 -12.14
ENSAPLG00020014557 SCARNA1 ns ns ns ns ns -15.68
ENSAPLG00020016540 NA ns ns ns ns -36.34 -29.96
ENSAPLG00020003062 NA ns ns ns ns ns -13.1
ENSAPLG00020007013 NA ns ns ns ns ns 14.69
ENSAPLG00020010278 RPL29 ns ns ns ns ns -10.61
ENSAPLG00020017598 LECT2 ns ns ns ns ns -9.95
ENSAPLG00020007861 NA ns ns ns ns ns -22.2

Gene functions

bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>% 
  select(-comp, -adj.P.Val, -logFC) %>% 
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  distinct(.) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id gene_biotype hgnc_symbol description
ENSAPLG00020007325 protein_coding NA NA
ENSAPLG00020005151 protein_coding NA NA
ENSAPLG00020002099 protein_coding SUSD2 sushi domain containing 2 [Source:HGNC Symbol;Acc:HGNC:30667]
ENSAPLG00020012418 protein_coding LAMB3 laminin subunit beta 3 [Source:HGNC Symbol;Acc:HGNC:6490]
ENSAPLG00020001571 protein_coding NA acyl-coenzyme A amino acid N-acyltransferase 2 [Source:NCBI gene;Acc:101797844]
ENSAPLG00020001757 protein_coding NA NA
ENSAPLG00020001436 protein_coding CPO carboxypeptidase O [Source:HGNC Symbol;Acc:HGNC:21011]
ENSAPLG00020016143 protein_coding PCK1 phosphoenolpyruvate carboxykinase 1 [Source:NCBI gene;Acc:101793637]
ENSAPLG00020013723 protein_coding NA NA
ENSAPLG00020016534 protein_coding NA guanylin-like [Source:NCBI gene;Acc:101803958]
ENSAPLG00020015167 protein_coding MME membrane metalloendopeptidase [Source:HGNC Symbol;Acc:HGNC:7154]
ENSAPLG00020009971 protein_coding NA NA
ENSAPLG00020005323 protein_coding PLB1 phospholipase B1 [Source:HGNC Symbol;Acc:HGNC:30041]
ENSAPLG00020009050 protein_coding NA receptor accessory protein 6 [Source:HGNC Symbol;Acc:HGNC:30078]
ENSAPLG00020003693 protein_coding NA NA
ENSAPLG00020000462 protein_coding SULT1C3 sulfotransferase 1C1 [Source:NCBI gene;Acc:101797216]
ENSAPLG00020001772 protein_coding NA NA
ENSAPLG00020006634 protein_coding SLC10A2 solute carrier family 10 member 2 [Source:HGNC Symbol;Acc:HGNC:10906]
ENSAPLG00020016544 protein_coding ACE2 angiotensin I converting enzyme 2 [Source:HGNC Symbol;Acc:HGNC:13557]
ENSAPLG00020005311 protein_coding SLC3A1 solute carrier family 3 member 1 [Source:HGNC Symbol;Acc:HGNC:11025]
ENSAPLG00020005022 protein_coding ADGRG7 adhesion G protein-coupled receptor G7 [Source:HGNC Symbol;Acc:HGNC:19241]
ENSAPLG00020008901 protein_coding PEPD peptidase D [Source:HGNC Symbol;Acc:HGNC:8840]
ENSAPLG00020009556 protein_coding CDKN2A cyclin-dependent kinase 4 inhibitor B-like [Source:NCBI gene;Acc:113840168]
ENSAPLG00020008627 protein_coding NQO1 NAD(P)H quinone dehydrogenase 1 [Source:HGNC Symbol;Acc:HGNC:2874]
ENSAPLG00020004729 protein_coding HKDC1 hexokinase domain containing 1 [Source:HGNC Symbol;Acc:HGNC:23302]
ENSAPLG00020004842 protein_coding NA NA
ENSAPLG00020010989 protein_coding NA NA
ENSAPLG00020001209 protein_coding APOB apolipoprotein B [Source:HGNC Symbol;Acc:HGNC:603]
ENSAPLG00020003190 protein_coding SLC5A1 solute carrier family 5 member 1 [Source:HGNC Symbol;Acc:HGNC:11036]
ENSAPLG00020013029 protein_coding NA p53 apoptosis effector related to PMP22 [Source:NCBI gene;Acc:101797297]
ENSAPLG00020007419 protein_coding DPEP1 dipeptidase 1 [Source:NCBI gene;Acc:101797901]
ENSAPLG00020017883 protein_coding XPNPEP2 X-prolyl aminopeptidase 2 [Source:HGNC Symbol;Acc:HGNC:12823]
ENSAPLG00020013655 protein_coding ENTPD5 ectonucleoside triphosphate diphosphohydrolase 5 (inactive) [Source:HGNC Symbol;Acc:HGNC:3367]
ENSAPLG00020000801 protein_coding NA NA
ENSAPLG00020007676 protein_coding NA NA
ENSAPLG00020007551 protein_coding NA NA
ENSAPLG00020011928 protein_coding NA NA
ENSAPLG00020006270 protein_coding NA NA
ENSAPLG00020017864 protein_coding NA NA
ENSAPLG00020009266 protein_coding GDA guanine deaminase [Source:HGNC Symbol;Acc:HGNC:4212]
ENSAPLG00020003980 protein_coding NA histone H4 [Source:NCBI gene;Acc:110351447]
ENSAPLG00020014973 protein_coding AQP4 aquaporin 4 [Source:HGNC Symbol;Acc:HGNC:637]
ENSAPLG00020008932 protein_coding SLC5A12 solute carrier family 5 member 12 [Source:HGNC Symbol;Acc:HGNC:28750]
ENSAPLG00020008682 protein_coding ALDOB aldolase, fructose-bisphosphate B [Source:NCBI gene;Acc:101796090]
ENSAPLG00020000944 protein_coding TMEM37 transmembrane protein 37 [Source:NCBI gene;Acc:101805462]
ENSAPLG00020004413 protein_coding AQP8 aquaporin 8 [Source:HGNC Symbol;Acc:HGNC:642]
ENSAPLG00020015106 protein_coding CYP4F8 cytochrome P450 family 4 subfamily F member 8 [Source:HGNC Symbol;Acc:HGNC:2648]
ENSAPLG00020004241 protein_coding NA NA
ENSAPLG00020000579 protein_coding NA NA
ENSAPLG00020017868 protein_coding NA NA
ENSAPLG00020003684 protein_coding NA NA
ENSAPLG00020013926 protein_coding OLFM4 olfactomedin 4 [Source:HGNC Symbol;Acc:HGNC:17190]
ENSAPLG00020018096 protein_coding SLC2A5 solute carrier family 2, facilitated glucose transporter member 5 [Source:NCBI gene;Acc:101804741]
ENSAPLG00020000128 protein_coding SERPINB1 serpin family B member 1 [Source:NCBI gene;Acc:101799613]
ENSAPLG00020005067 pseudogene NA NA
ENSAPLG00020005597 protein_coding KCNJ15 potassium inwardly rectifying channel subfamily J member 15 [Source:NCBI gene;Acc:101795505]
ENSAPLG00020017448 protein_coding LGALS3 galectin 3 [Source:HGNC Symbol;Acc:HGNC:6563]
ENSAPLG00020008527 protein_coding F2RL1 F2R like trypsin receptor 1 [Source:HGNC Symbol;Acc:HGNC:3538]
ENSAPLG00020015547 rRNA NA NA
ENSAPLG00020005500 protein_coding NR0B2 nuclear receptor subfamily 0 group B member 2 [Source:NCBI gene;Acc:101799243]
ENSAPLG00020000836 protein_coding NA NA
ENSAPLG00020011820 lncRNA NA NA
ENSAPLG00020007487 protein_coding NA C-factor [Source:NCBI gene;Acc:101802025]
ENSAPLG00020013900 protein_coding NA NA
ENSAPLG00020017865 protein_coding NA NA
ENSAPLG00020007413 protein_coding NA NA
ENSAPLG00020008194 protein_coding NA NA
ENSAPLG00020011484 protein_coding NA NA
ENSAPLG00020016671 protein_coding NA Metallothionein [Source:UniProtKB/Swiss-Prot;Acc:P68494]
ENSAPLG00020006372 protein_coding CALB1 calbindin 1 [Source:NCBI gene;Acc:101802006]
ENSAPLG00020000985 protein_coding KY kyphoscoliosis peptidase [Source:HGNC Symbol;Acc:HGNC:26576]
ENSAPLG00020007294 protein_coding NA NA
ENSAPLG00020001254 protein_coding NA NA
ENSAPLG00020013906 protein_coding NA NA
ENSAPLG00020007723 protein_coding NA NA
ENSAPLG00020010330 protein_coding NA NA
ENSAPLG00020009562 protein_coding ACOT12 acyl-CoA thioesterase 12 [Source:HGNC Symbol;Acc:HGNC:24436]
ENSAPLG00020005630 pseudogene NA NA
ENSAPLG00020003187 protein_coding SLC44A2 solute carrier family 44 member 2 [Source:HGNC Symbol;Acc:HGNC:17292]
ENSAPLG00020016235 protein_coding SLC13A1 solute carrier family 13 member 1 [Source:HGNC Symbol;Acc:HGNC:10916]
ENSAPLG00020008862 protein_coding NR4A3 nuclear receptor subfamily 4 group A member 3 [Source:HGNC Symbol;Acc:HGNC:7982]
ENSAPLG00020013917 protein_coding NA NA
ENSAPLG00020016406 protein_coding PCSK9 proprotein convertase subtilisin/kexin type 9 [Source:HGNC Symbol;Acc:HGNC:20001]
ENSAPLG00020008172 protein_coding NA NA
ENSAPLG00020003114 protein_coding TKFC triokinase and FMN cyclase [Source:HGNC Symbol;Acc:HGNC:24552]
ENSAPLG00020003094 lncRNA NA NA
ENSAPLG00020004137 protein_coding NA NA
ENSAPLG00020004389 protein_coding DDX47 DEAD-box helicase 47 [Source:HGNC Symbol;Acc:HGNC:18682]
ENSAPLG00020003497 protein_coding KLF4 Kruppel like factor 4 [Source:HGNC Symbol;Acc:HGNC:6348]
ENSAPLG00020017346 protein_coding PIPOX pipecolic acid and sarcosine oxidase [Source:HGNC Symbol;Acc:HGNC:17804]
ENSAPLG00020008534 protein_coding SLCO4A1 solute carrier organic anion transporter family member 4A1 [Source:HGNC Symbol;Acc:HGNC:10953]
ENSAPLG00020000599 protein_coding NA NA
ENSAPLG00020003474 protein_coding DNAJC22 DnaJ heat shock protein family (Hsp40) member C22 [Source:HGNC Symbol;Acc:HGNC:25802]
ENSAPLG00020008855 protein_coding NA NA
ENSAPLG00020011196 protein_coding NA NA
ENSAPLG00020002652 protein_coding NA gamma-glutamyltransferase 1 [Source:NCBI gene;Acc:101799199]
ENSAPLG00020017229 protein_coding SLC6A4 solute carrier family 6 member 4 [Source:HGNC Symbol;Acc:HGNC:11050]
ENSAPLG00020004558 protein_coding NA NA
ENSAPLG00020007890 lncRNA NA NA
ENSAPLG00020015505 protein_coding NA NA
ENSAPLG00020013563 protein_coding SQLE squalene epoxidase [Source:HGNC Symbol;Acc:HGNC:11279]
ENSAPLG00020012578 protein_coding CREB3L3 cAMP responsive element binding protein 3 like 3 [Source:HGNC Symbol;Acc:HGNC:18855]
ENSAPLG00020002747 snoRNA NA NA
ENSAPLG00020007878 protein_coding B3GALT5 beta-1,3-galactosyltransferase 5 [Source:HGNC Symbol;Acc:HGNC:920]
ENSAPLG00020004812 protein_coding NPC1L1 NPC1 like intracellular cholesterol transporter 1 [Source:HGNC Symbol;Acc:HGNC:7898]
ENSAPLG00020004008 protein_coding NA histone H4 [Source:NCBI gene;Acc:110351447]
ENSAPLG00020004098 protein_coding NA NA
ENSAPLG00020008569 protein_coding MFSD4B major facilitator superfamily domain containing 4B [Source:HGNC Symbol;Acc:HGNC:21053]
ENSAPLG00020017400 protein_coding LTF transferrin [Source:NCBI gene;Acc:101795303]
ENSAPLG00020010722 protein_coding DUOX2 dual oxidase 2 [Source:HGNC Symbol;Acc:HGNC:13273]
ENSAPLG00020007929 protein_coding PCP4 Purkinje cell protein 4 [Source:HGNC Symbol;Acc:HGNC:8742]
ENSAPLG00020006588 protein_coding S100A13 S100 calcium binding protein A13 [Source:HGNC Symbol;Acc:HGNC:10490]
ENSAPLG00020000169 protein_coding INSIG1 insulin induced gene 1 [Source:HGNC Symbol;Acc:HGNC:6083]
ENSAPLG00020017133 protein_coding NA 60S ribosomal protein L36a [Source:NCBI gene;Acc:101801033]
ENSAPLG00020012336 snoRNA NA small nucleolar RNA, C/D box 94 [Source:HGNC Symbol;Acc:HGNC:32756]
ENSAPLG00020006537 protein_coding S100A12 S100 calcium binding protein A12 [Source:HGNC Symbol;Acc:HGNC:10489]
ENSAPLG00020015804 protein_coding APOA4 apolipoprotein A4 [Source:HGNC Symbol;Acc:HGNC:602]
ENSAPLG00020000876 protein_coding MSMO1 methylsterol monooxygenase 1 [Source:NCBI gene;Acc:101804996]
ENSAPLG00020005335 protein_coding NA NA
ENSAPLG00020003263 protein_coding NA NA
ENSAPLG00020000190 protein_coding NA NA
ENSAPLG00020014632 protein_coding SERPING1 serpin family G member 1 [Source:HGNC Symbol;Acc:HGNC:1228]
ENSAPLG00020014118 protein_coding MRPS23 mitochondrial ribosomal protein S23 [Source:HGNC Symbol;Acc:HGNC:14509]
ENSAPLG00020004059 protein_coding NA NA
ENSAPLG00020008356 protein_coding SLC2A2 solute carrier family 2 member 2 [Source:HGNC Symbol;Acc:HGNC:11006]
ENSAPLG00020017609 protein_coding NA solute carrier family 6 member 14 [Source:NCBI gene;Acc:101802830]
ENSAPLG00020007966 lncRNA NA NA
ENSAPLG00020014477 protein_coding NA NA
ENSAPLG00020015261 protein_coding NA podocalyxin like [Source:HGNC Symbol;Acc:HGNC:9171]
ENSAPLG00020014557 scaRNA SCARNA1 small Cajal body-specific RNA 1 [Source:HGNC Symbol;Acc:HGNC:32555]
ENSAPLG00020016540 lncRNA NA NA
ENSAPLG00020003062 lncRNA NA NA
ENSAPLG00020007013 protein_coding NA NA
ENSAPLG00020010278 protein_coding RPL29 ribosomal protein L29 [Source:HGNC Symbol;Acc:HGNC:10331]
ENSAPLG00020017598 protein_coding LECT2 leukocyte cell derived chemotaxin 2 [Source:HGNC Symbol;Acc:HGNC:6550]
ENSAPLG00020007861 protein_coding NA ribosomal protein L23 [Source:NCBI gene;Acc:101793494]

Pathway analysis

de.entrez <- annot %>% filter(ensembl_gene_id %in% dt.tib$gene) %>% na.omit(entrezgene_id)
entrez.universe <- annot %>% na.omit(entrezgene_id)

K <- kegga(de.entrez$entrezgene_id, universe = entrez.universe$entrezgene_id, species.KEGG = "apla") %>% 
  as_tibble(rownames = NA) %>% 
  rownames_to_column("PathwayID") %>% 
  filter(P.DE < 0.05) %>% 
  arrange(P.DE)

keggList <- getGeneKEGGLinks(species.KEGG = "apla") %>% 
  as_tibble() %>% 
  rename("entrezgene_id" = GeneID)

keggGenes = data.frame()
for (path in nrow(K)){
    procPath <- filter(keggList, entrezgene_id %in% de.entrez$entrezgene_id)
    df <- data.frame(procPath)
    keggGenes <- rbind(keggGenes, df)
}

inner_join(keggGenes, K) %>%
  as_tibble() %>% 
  mutate(entrezgene_id = as.numeric(entrezgene_id)) %>% 
  left_join(annot) %>% 
  select(Pathway, PathwayID, N, DE, P.DE, ensembl_gene_id, hgnc_symbol, entrezgene_id) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Pathway PathwayID N DE P.DE ensembl_gene_id hgnc_symbol entrezgene_id
Glycolysis / Gluconeogenesis path:apla00010 14 2 0.0011323 ENSAPLG00020016143 PCK1 101793637
Glycolysis / Gluconeogenesis path:apla00010 14 2 0.0011323 ENSAPLG00020008682 ALDOB 101796090
Citrate cycle (TCA cycle) path:apla00020 9 1 0.0330012 ENSAPLG00020016143 PCK1 101793637
Pentose phosphate pathway path:apla00030 9 1 0.0330012 ENSAPLG00020008682 ALDOB 101796090
Fructose and mannose metabolism path:apla00051 10 1 0.0366053 ENSAPLG00020008682 ALDOB 101796090
Steroid biosynthesis path:apla00100 5 1 0.0184602 ENSAPLG00020000876 MSMO1 101804996
Pyruvate metabolism path:apla00620 8 1 0.0293847 ENSAPLG00020016143 PCK1 101793637
FoxO signaling pathway path:apla04068 31 2 0.0055831 ENSAPLG00020016143 PCK1 101793637
FoxO signaling pathway path:apla04068 31 2 0.0055831 ENSAPLG00020009556 CDKN2A 113840168
Ferroptosis path:apla04216 11 1 0.0401970 ENSAPLG00020017400 LTF 101795303