This is an analysis of differential expression at the gene level between supershedder groupings based on shedding rates measured on the day of sacrifice for infected mallard bursa samples.

## Load packages and data
library(limma)
library(edgeR)
library(Glimma)
library(gplots)
library(ggrepel)
library(RColorBrewer)
library(gridExtra)
library(kableExtra)
library(tidyverse)

## Load data
cnt <- read.table("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/all.genes.results.csv", header = TRUE) %>% as_tibble(rownames = NA)
covars <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/MALL70_SSgroup_RAW_11.5.20.csv", delim = ",")
annot <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/annotationGenes103120.txt", delim = "\t")

Prep data for analysis

newNames <- names(cnt) %>%
  as_tibble() %>%
  separate(value, into = c("pt1", "pt2", "pt3")) %>%
  select(pt2, pt3) %>%
  mutate(pt2 = as.numeric(pt2),
         pt2 = formatC(pt2, width = 2, format = "d", flag = "0")) %>%
  mutate(newName = paste0("bursa_", pt2))

colnames(cnt) <- newNames$newName

cnt <- cnt %>% 
  select(order(colnames(cnt)))

covars <- covars %>%
  mutate(bird = as.numeric(bird)) %>%
  mutate(bird = formatC(bird, width = 2, format = "d", flag = "0")) %>%
  arrange(bird) %>%
  mutate(bird = as.factor(bird)) %>%
  mutate(group = str_remove(group, "-"))

## Remove birds with issues
cnt <- cnt %>% select(-bursa_01, -bursa_72)

covars <- covars %>% filter(bird != "01", bird != "72")

Prep data for analysis

Here we standardize expression levels across samples of varying depth by using the counts per million (CPM) and trimmed mean of M-values (TMM) methods. We also remove any genes that are expressed at few than 0.5 CPM in at least 25% of individuals.

#Convert to DGEList object
dge <- DGEList(counts=cnt)
dge$genes <- annot[match(rownames(dge$counts), annot$ensembl_gene_id),]

#CPM and log-CPM
cpm <- cpm(dge)
lcpm <- cpm(dge, log = TRUE)

#### Retain only genes expressed at >0.5 CPM in at least 25% of individuals
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
## FALSE  TRUE 
## 16979  1511
keep.exprs <- rowSums(cpm>0.5) >= length(cnt[1,])/4
sum(keep.exprs)
## [1] 13110
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
## FALSE 
## 13110
dim(dge)
## [1] 13110    68

Subset data

Following normalization, we subset the data based on infection groups.

#Assign groups in DGElist
group <- recode(covars$group, C1 = "Ctl", C29 = "Ctl") %>% 
  as.factor()
dge$samples$group <- group

### Divide out I1 and I2
dge.I1 <- dge[,grep('\\bI1\\b', dge$samples$group)]
dge.I1 <- calcNormFactors(dge.I1, method = "TMM")

dge.I2 <- dge[,grep('\\bI2\\b', dge$samples$group)]
dge.I2 <- calcNormFactors(dge.I2, method = "TMM")

covars.I1 <- covars %>% filter(group == "I1")
covars.I2 <- covars %>% filter(group == "I2")

Differential expression analysis

Establish design & contrast matrices

## Establish design matrix: I1
design.I1 = model.matrix(~ 0 +
                        factor(covars.I1$SSgroup.virus.sac) +
                        covars.I1$age +
                        covars.I1$sex + 
                        covars.I1$Bursa_pool)

colnames(design.I1) <- c("HIGH", "LOW", "MODERATE",
                         "age", "sexM", "poolMSU2",
                         "poolMSU3", "poolMSU4", "poolMSU5")


contr.matrix.I1 = makeContrasts(
  LvM = LOW - MODERATE,
  MvH = MODERATE - HIGH,
  LvH = LOW - HIGH,
  levels = colnames(design.I1))

## Mean-variance trend plots
v.I1 <- voomWithQualityWeights(dge.I1, design.I1, plot = FALSE)

#Fitting models
vfit.I1 <- lmFit(v.I1, design.I1) 
vfit.I1 <- contrasts.fit(vfit.I1, contrasts = contr.matrix.I1)
tfit.I1 <- treat(vfit.I1, lfc = 0.5)

## Identify DE genes
dt.I1 <- decideTests(tfit.I1, p.value = 0.1, adjust.method = "fdr")
dt.tib.I1 <- as_tibble(dt.I1, rownames = NA) %>% 
  rownames_to_column("gene") %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  rename(LvM.I1 = LvM,
         MvH.I1 = MvH,
         LvH.I1 = LvH) %>% 
  filter_at(vars(LvM.I1:LvH.I1), any_vars(. != 0))


## Establish design matrix: I2
design.I2 = model.matrix(
  ~ 0 +
    factor(covars.I2$SSgroup.virus.sac) +
    covars.I2$age +
    covars.I2$sex +
    covars.I2$Bursa_pool
)

colnames(design.I2) <- c(
  "HIGH",
  "LOW",
  "MODERATE",
  "age",
  "sexM",
  "poolMSU2",
  "poolMSU3",
  "poolMSU4",
  "poolMSU5"
)

contr.matrix.I2 = makeContrasts(
  LvM = LOW - MODERATE,
  MvH = MODERATE - HIGH,
  LvH = LOW - HIGH,
  levels = colnames(design.I2))

## Mean-variance trend plots
v.I2 <- voomWithQualityWeights(dge.I2, design.I2, plot = FALSE)

#Fitting models
vfit.I2 <- lmFit(v.I2, design.I2) 
vfit.I2 <- contrasts.fit(vfit.I2, contrasts = contr.matrix.I2)
tfit.I2 <- treat(vfit.I2, lfc = 0.5)

## Identify DE genes
dt.I2 <- decideTests(tfit.I2, p.value = 0.1, adjust.method = "fdr")
dt.tib.I2 <- as_tibble(dt.I2, rownames = NA) %>% 
  rownames_to_column("gene") %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  rename(LvM.I2 = LvM,
         MvH.I2 = MvH,
         LvH.I2 = LvH) %>% 
  filter_at(vars(LvM.I2:LvH.I2), any_vars(. != 0))


## Bring it all together
dt.tib <- full_join(dt.tib.I1, dt.tib.I2) %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  replace(is.na(.), 0)
## Joining, by = "gene"

Count DE genes

For a gene to be considered differentially expressed, we require a p-value of 0.1 with a false discovery rate correction and a log fold count difference of 1.

allResults <- rbind(
  as.data.frame(summary(dt.I1)),
  as.data.frame(summary(dt.I2))) %>% 
  filter(Var1 != "NotSig")

kable(allResults) %>%
  kable_styling("striped", full_width = F) %>%
  pack_rows("1 Day Post Infection", 1, 6) %>%
  pack_rows("2 Days Post Infection", 7, 12)
Var1 Var2 Freq
1 Day Post Infection
Down LvM 0
Up LvM 0
Down MvH 0
Up MvH 57
Down LvH 0
Up LvH 0
2 Days Post Infection
Down LvM 3
Up LvM 4
Down MvH 5
Up MvH 7
Down LvH 5
Up LvH 7

Volcano plot

tmp1 <- topTreat(tfit.I1, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. Moderate shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp2 <- topTreat(tfit.I1, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Moderate vs. High shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp3 <- topTreat(tfit.I1, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. High shedding, Day 1") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp4 <- topTreat(tfit.I2, coef = 1, n = Inf)
results4 <- mutate(tmp4, sig=ifelse(tmp4$adj.P.Val<0.1, "Sig", "Not Sig"))
p4 <- ggplot(results4, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. Moderate shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp5 <- topTreat(tfit.I2, coef = 2, n = Inf)
results5 <- mutate(tmp5, sig=ifelse(tmp5$adj.P.Val<0.1, "Sig", "Not Sig"))
p5 <- ggplot(results5, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Moderate vs. High shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp6 <- topTreat(tfit.I2, coef = 3, n = Inf)
results6 <- mutate(tmp6, sig=ifelse(tmp6$adj.P.Val<0.1, "Sig", "Not Sig"))
p6 <- ggplot(results6, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. High shedding, Day 2") +
  ylab("-Log10(p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

grid.arrange(p1, p2, p3, p4, p5, p6, nrow = 2)

Heatmap - I1

# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I1$counts, rownames = NA) %>% 
  cpm(., log = TRUE)

newNames <- colnames(lcpm2) %>% 
  as_tibble() %>% 
  separate(value, into = c(NA, "bird")) %>% 
  bind_cols(covars.I1) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colOrder <- bind_cols(covars.I1) %>% 
  mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
  arrange(SSgroup.virus.sac, group, bird) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]

heatmap.annot <- annot %>%
  filter(ensembl_gene_id %in% dt.tib.I1$gene) %>% 
  mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>% 
  unite(annot.hm, c(ensembl_gene_id, hgnc_symbol), sep = "-", remove = FALSE) %>% 
  select(annot.hm, ensembl_gene_id)

deGeneCounts <- lcpm2 %>% 
  as_tibble() %>% 
  mutate(ensembl_gene_id = rownames(dge.I1$counts)) %>% 
  filter(ensembl_gene_id %in% dt.tib.I1$gene) %>% 
  left_join(heatmap.annot) %>% 
  column_to_rownames("annot.hm") %>%
  select(-ensembl_gene_id) %>% 
  as.matrix()

## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)

hc <- hclust(as.dist(1-cor(t(deGeneCounts))))

# Plot the heatmap
heatmap.2(deGeneCounts,
          Colv = FALSE,
          Rowv = as.dendrogram(hc),
          col = rev(morecols(50)),
          trace = "none",
          colsep = c(6, 12),
          dendrogram = "row",
          density.info = "none",
          key = TRUE,
          margins = c(10, 14),
          scale ="row")

Heatmap - I2

# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I2$counts, rownames = NA) %>% 
  cpm(., log = TRUE)

newNames <- colnames(lcpm2) %>% 
  as_tibble() %>% 
  separate(value, into = c(NA, "bird")) %>% 
  bind_cols(covars.I2) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colOrder <- bind_cols(covars.I2) %>% 
  mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
  arrange(SSgroup.virus.sac, group, bird) %>% 
  unite("newNames", SSgroup.virus.sac, group, bird)

colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]

heatmap.annot <- annot %>%
  filter(ensembl_gene_id %in% dt.tib.I2$gene) %>% 
  mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>% 
  unite(annot.hm, c(ensembl_gene_id, hgnc_symbol), sep = "-", remove = FALSE) %>% 
  select(annot.hm, ensembl_gene_id)

deGeneCounts <- lcpm2 %>% 
  as_tibble() %>% 
  mutate(ensembl_gene_id = rownames(dge.I2$counts)) %>% 
  filter(ensembl_gene_id %in% dt.tib.I2$gene) %>% 
  left_join(heatmap.annot) %>% 
  column_to_rownames("annot.hm") %>%
  select(-ensembl_gene_id) %>% 
  as.matrix()

## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)

hc <- hclust(as.dist(1-cor(t(deGeneCounts))))

# Plot the heatmap
heatmap.2(deGeneCounts,
          Colv = FALSE,
          Rowv = as.dendrogram(hc),
          col = rev(morecols(50)),
          trace = "none",
          colsep = c(4, 8),
          dendrogram = "row",
          density.info = "none",
          key = TRUE,
          margins = c(12, 12),
          scale ="row")

Boxplots for I1 differentially expressed genes

lcpm.DE <- lcpm %>%
  as_tibble(rownames = NA) %>% 
  rownames_to_column("identifier") %>% 
  filter(identifier %in% dt.tib.I1$gene) %>%
  pivot_longer(cols = contains("_"),
               names_to = "sample",
               values_to = "lcpm") %>%
  separate(sample, into = c("tissue", "bird")) %>%
  mutate(bird = as.factor(bird)) %>%
  left_join(covars, by = "bird") %>%
  filter(group == "C1" | group == "C14" | group == "I1") %>% 
  select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>% 
  mutate(group = recode(group, C29 = "Control"),
         group = recode(group, C1 = "Control"),
         tmpID = ifelse(group == "Control", 'Control', 'Infected'))


aprioriPlotting <- function(target, ...) {
   annot.target <- annot %>%
    select(
      ensembl_gene_id,
      hgnc_symbol) %>%
    filter(ensembl_gene_id == target) %>% 
    mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))

  plot <- lcpm.DE %>%
    filter(identifier == target) %>%
    mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
    ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
    facet_grid(. ~ tmpID, scales = "free", space = "free") +
    ylab("Log2(Counts per million)") +
    xlab("Shedding Group") +
    scale_fill_grey(start = 0.35, end = 1) + 
    geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
    geom_boxplot(alpha = 0.5) +
    geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
    theme_classic() +
    labs(title = paste0("I1: ", target, " - ", annot.target[1,2])) +
    theme(legend.title = element_blank())
    
  print(plot)
}

for(target in sort(unique(dt.tib.I1$gene))) {
  aprioriPlotting(target)
}

Boxplots for I2 differentially expressed genes

lcpm.DE <- lcpm %>%
  as_tibble(rownames = NA) %>% 
  rownames_to_column("identifier") %>% 
  filter(identifier %in% dt.tib.I2$gene) %>%
  pivot_longer(cols = contains("_"),
               names_to = "sample",
               values_to = "lcpm") %>%
  separate(sample, into = c("tissue", "bird")) %>%
  mutate(bird = as.factor(bird)) %>%
  left_join(covars, by = "bird") %>%
  filter(group == "C1" | group == "C14" | group == "I2") %>% 
  select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>% 
  mutate(group = recode(group, C29 = "Control"),
         group = recode(group, C1 = "Control"),
         tmpID = ifelse(group == "Control", 'Control', 'Infected'))


aprioriPlotting <- function(target, ...) {
   annot.target <- annot %>%
    select(
      ensembl_gene_id,
      hgnc_symbol) %>%
    filter(ensembl_gene_id == target) %>% 
    mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))

  plot <- lcpm.DE %>%
    filter(identifier == target) %>%
    mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
    ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
    facet_grid(. ~ tmpID, scales = "free", space = "free") +
    ylab("Log2(Counts per million)") +
    xlab("Shedding Group") +
    scale_fill_grey(start = 0.35, end = 1) + 
    geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
    geom_boxplot(alpha = 0.5) +
    geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
    theme_classic() +
    labs(title = paste0("I2: ", target, " - ", annot.target[1,2])) +
    theme(legend.title = element_blank())
    
  print(plot)
}

for(target in sort(unique(dt.tib.I2$gene))) {
  aprioriPlotting(target)
}

Annotations for differentially expressed genes

ns denotes non-significant genes for each comparison and numerical values are the log(fold count) difference

tmp1a <- tmp1 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I1")
tmp2a <- tmp2 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I1")
tmp3a <- tmp3 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I1")
tmp4a <- tmp4 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I2")
tmp5a <- tmp5 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I2")
tmp6a <- tmp6 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I2")

bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>% 
  select(-description) %>% 
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  mutate(logFoldCount = ifelse(adj.P.Val < 0.10, round(logFC, 2), "ns")) %>% 
  select(-adj.P.Val, -logFC, -gene_biotype) %>% 
  pivot_wider(names_from = comp, values_from = logFoldCount) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id hgnc_symbol LvM.I1 MvH.I1 LvH.I1 LvM.I2 MvH.I2 LvH.I2
ENSAPLG00020012805 NA ns 5.88 ns ns ns ns
ENSAPLG00020012792 NA ns 6.33 ns ns ns ns
ENSAPLG00020015339 NA ns 6.72 ns ns ns ns
ENSAPLG00020017609 NA ns 7.28 ns ns ns ns
ENSAPLG00020005067 NA ns 4.56 ns ns ns ns
ENSAPLG00020017000 MSLNL ns 4.8 ns ns ns ns
ENSAPLG00020006243 RARRES1 ns 4.75 ns ns ns ns
ENSAPLG00020010722 DUOX2 ns 4.75 ns ns ns ns
ENSAPLG00020016974 NOXO1 ns 6 ns ns ns ns
ENSAPLG00020002670 FAM3D ns 4.89 ns ns ns ns
ENSAPLG00020015459 ATP10B ns 4.7 ns ns ns ns
ENSAPLG00020013506 NA ns 6.55 ns ns ns 7.11
ENSAPLG00020011723 NA ns 4.51 ns ns ns ns
ENSAPLG00020000599 NA ns 5.08 ns ns ns ns
ENSAPLG00020006095 NA ns 4.04 ns ns ns ns
ENSAPLG00020003190 SLC5A1 ns 4.46 ns ns ns ns
ENSAPLG00020001770 AGR2 ns 5.28 ns ns ns ns
ENSAPLG00020003213 NA ns 6.78 ns ns ns ns
ENSAPLG00020006537 S100A12 ns 6.04 ns ns ns ns
ENSAPLG00020009922 NOX1 ns 4.52 ns ns ns ns
ENSAPLG00020002072 NA ns 4.91 ns ns ns ns
ENSAPLG00020017274 QSOX1 ns 2.96 ns ns ns ns
ENSAPLG00020000995 NA ns 8.55 ns ns ns ns
ENSAPLG00020011745 LYZ ns 3.46 ns ns 9.66 7.53
ENSAPLG00020016998 NA ns 2.69 ns ns ns ns
ENSAPLG00020010988 XDH ns 1.74 ns ns ns ns
ENSAPLG00020016372 LPO ns 4.56 ns ns ns ns
ENSAPLG00020005548 NA ns 7.43 ns ns ns ns
ENSAPLG00020006319 DIO2 ns 3.32 ns ns ns ns
ENSAPLG00020005338 ATP13A4 ns 4.3 ns ns ns ns
ENSAPLG00020015787 SLC34A2 ns 5.22 ns ns ns ns
ENSAPLG00020012965 NA ns ns ns ns 12.44 9.72
ENSAPLG00020014762 COL19A1 ns 4.17 ns ns ns ns
ENSAPLG00020005112 NA ns 3.98 ns ns ns ns
ENSAPLG00020011196 NA ns 5.73 ns ns ns ns
ENSAPLG00020002067 TMEM184A ns 4.89 ns ns ns ns
ENSAPLG00020016695 NA ns 2.2 ns ns ns ns
ENSAPLG00020005345 HSD11B2 ns 4.27 ns ns ns ns
ENSAPLG00020009946 HPGD ns 3.82 ns ns ns ns
ENSAPLG00020011564 NA ns 5.39 ns ns ns ns
ENSAPLG00020012541 SESN2 ns 3.73 ns ns ns ns
ENSAPLG00020015729 C4orf19 ns 4.08 ns ns ns ns
ENSAPLG00020005545 EXOC3L4 ns 3.52 ns ns ns ns
ENSAPLG00020015525 NA ns ns ns ns 25.08 20.17
ENSAPLG00020012091 NA ns 6.48 ns ns ns ns
ENSAPLG00020001655 CRISP3 ns 5.94 ns ns ns ns
ENSAPLG00020002852 NA ns 4.2 ns ns ns ns
ENSAPLG00020005852 NA ns 3.46 ns ns ns ns
ENSAPLG00020014622 NA ns 2.1 ns ns ns ns
ENSAPLG00020016310 CFTR ns 3.03 ns ns ns ns
ENSAPLG00020016962 NA ns ns ns -7.37 18.86 11.49
ENSAPLG00020005866 DUSP16 ns 2.21 ns ns ns ns
ENSAPLG00020000142 SLC9A4 ns 5.57 ns ns ns ns
ENSAPLG00020006621 NA ns 2.85 ns ns ns ns
ENSAPLG00020018298 PGAP6 ns 1.9 ns ns ns ns
ENSAPLG00020015010 LINGO3 ns 5.69 ns ns ns ns
ENSAPLG00020003094 NA ns ns ns 5.15 -16.49 -11.34
ENSAPLG00020015289 NA ns 4.4 ns ns ns ns
ENSAPLG00020013527 ADCY8 ns 3.68 ns ns ns ns
ENSAPLG00020007580 NA ns 3.46 ns ns ns ns
ENSAPLG00020015905 NA ns ns ns 5.15 -15.34 -10.18
ENSAPLG00020002125 ACTA1 ns ns ns ns 15.2 10.92
ENSAPLG00020008456 NA ns 1.73 ns ns ns ns
ENSAPLG00020003263 NA ns ns ns ns 12.82 10.72
ENSAPLG00020003115 NA ns ns ns 3.9 -10.73 ns
ENSAPLG00020015577 NA ns ns ns -5.5 ns ns
ENSAPLG00020012365 NA ns ns ns ns -31.17 -22.97
ENSAPLG00020011889 NA ns ns ns ns -23.71 -18.98
ENSAPLG00020005820 NA ns ns ns 5.62 ns ns
ENSAPLG00020003671 NA ns ns ns ns ns -5.87
ENSAPLG00020011926 NA ns ns ns -4 9.59 ns

Gene functions

bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>% 
  select(-comp, -adj.P.Val, -logFC) %>% 
  filter(ensembl_gene_id %in% dt.tib$gene) %>% 
  distinct(.) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id gene_biotype hgnc_symbol description
ENSAPLG00020012805 protein_coding NA NA
ENSAPLG00020012792 protein_coding NA NA
ENSAPLG00020015339 protein_coding NA NA
ENSAPLG00020017609 protein_coding NA solute carrier family 6 member 14 [Source:NCBI gene;Acc:101802830]
ENSAPLG00020005067 pseudogene NA NA
ENSAPLG00020017000 protein_coding MSLNL mesothelin like [Source:HGNC Symbol;Acc:HGNC:14170]
ENSAPLG00020006243 protein_coding RARRES1 retinoic acid receptor responder 1 [Source:HGNC Symbol;Acc:HGNC:9867]
ENSAPLG00020010722 protein_coding DUOX2 dual oxidase 2 [Source:HGNC Symbol;Acc:HGNC:13273]
ENSAPLG00020016974 protein_coding NOXO1 NADPH oxidase organizer 1 [Source:HGNC Symbol;Acc:HGNC:19404]
ENSAPLG00020002670 protein_coding FAM3D FAM3 metabolism regulating signaling molecule D [Source:HGNC Symbol;Acc:HGNC:18665]
ENSAPLG00020015459 protein_coding ATP10B ATPase phospholipid transporting 10B (putative) [Source:HGNC Symbol;Acc:HGNC:13543]
ENSAPLG00020013506 protein_coding NA beta-microseminoprotein [Source:NCBI gene;Acc:101795347]
ENSAPLG00020011723 protein_coding NA ovoinhibitor [Source:NCBI gene;Acc:101798824]
ENSAPLG00020000599 protein_coding NA NA
ENSAPLG00020006095 protein_coding NA NA
ENSAPLG00020003190 protein_coding SLC5A1 solute carrier family 5 member 1 [Source:HGNC Symbol;Acc:HGNC:11036]
ENSAPLG00020001770 protein_coding AGR2 anterior gradient 2, protein disulphide isomerase family member [Source:HGNC Symbol;Acc:HGNC:328]
ENSAPLG00020003213 protein_coding NA NA
ENSAPLG00020006537 protein_coding S100A12 S100 calcium binding protein A12 [Source:HGNC Symbol;Acc:HGNC:10489]
ENSAPLG00020009922 protein_coding NOX1 NADPH oxidase 1 [Source:NCBI gene;Acc:101798310]
ENSAPLG00020002072 protein_coding NA NA
ENSAPLG00020017274 protein_coding QSOX1 quiescin sulfhydryl oxidase 1 [Source:HGNC Symbol;Acc:HGNC:9756]
ENSAPLG00020000995 protein_coding NA NA
ENSAPLG00020011745 protein_coding LYZ lysozyme [Source:HGNC Symbol;Acc:HGNC:6740]
ENSAPLG00020016998 protein_coding NA NA
ENSAPLG00020010988 protein_coding XDH xanthine dehydrogenase [Source:HGNC Symbol;Acc:HGNC:12805]
ENSAPLG00020016372 protein_coding LPO lactoperoxidase [Source:HGNC Symbol;Acc:HGNC:6678]
ENSAPLG00020005548 protein_coding NA NA
ENSAPLG00020006319 protein_coding DIO2 iodothyronine deiodinase 2 [Source:HGNC Symbol;Acc:HGNC:2884]
ENSAPLG00020005338 protein_coding ATP13A4 probable cation-transporting ATPase 13A4 [Source:NCBI gene;Acc:101794117]
ENSAPLG00020015787 protein_coding SLC34A2 solute carrier family 34 member 2 [Source:NCBI gene;Acc:101803835]
ENSAPLG00020012965 protein_coding NA Anas platyrhynchos free fatty acid receptor 4 (FFAR4), mRNA. [Source:RefSeq mRNA;Acc:NM_001310831]
ENSAPLG00020014762 protein_coding COL19A1 collagen type XIX alpha 1 chain [Source:HGNC Symbol;Acc:HGNC:2196]
ENSAPLG00020005112 protein_coding NA NA
ENSAPLG00020011196 protein_coding NA NA
ENSAPLG00020002067 protein_coding TMEM184A transmembrane protein 184A [Source:HGNC Symbol;Acc:HGNC:28797]
ENSAPLG00020016695 protein_coding NA NA
ENSAPLG00020005345 protein_coding HSD11B2 hydroxysteroid 11-beta dehydrogenase 2 [Source:NCBI gene;Acc:101790751]
ENSAPLG00020009946 protein_coding HPGD 15-hydroxyprostaglandin dehydrogenase [Source:HGNC Symbol;Acc:HGNC:5154]
ENSAPLG00020011564 protein_coding NA NA
ENSAPLG00020012541 protein_coding SESN2 sestrin 2 [Source:HGNC Symbol;Acc:HGNC:20746]
ENSAPLG00020015729 protein_coding C4orf19 chromosome 4 C4orf19 homolog [Source:NCBI gene;Acc:101796779]
ENSAPLG00020005545 protein_coding EXOC3L4 exocyst complex component 3 like 4 [Source:HGNC Symbol;Acc:HGNC:20120]
ENSAPLG00020015525 protein_coding NA NA
ENSAPLG00020012091 protein_coding NA NA
ENSAPLG00020001655 protein_coding CRISP3 cysteine rich secretory protein 3 [Source:HGNC Symbol;Acc:HGNC:16904]
ENSAPLG00020002852 protein_coding NA NA
ENSAPLG00020005852 protein_coding NA NA
ENSAPLG00020014622 protein_coding NA NA
ENSAPLG00020016310 protein_coding CFTR CF transmembrane conductance regulator [Source:HGNC Symbol;Acc:HGNC:1884]
ENSAPLG00020016962 scaRNA NA small Cajal body-specific RNA 2 [Source:HGNC Symbol;Acc:HGNC:32558]
ENSAPLG00020005866 protein_coding DUSP16 dual specificity phosphatase 16 [Source:NCBI gene;Acc:101790899]
ENSAPLG00020000142 protein_coding SLC9A4 solute carrier family 9 member A4 [Source:HGNC Symbol;Acc:HGNC:11077]
ENSAPLG00020006621 protein_coding NA protein kinase C eta [Source:NCBI gene;Acc:101799041]
ENSAPLG00020018298 protein_coding PGAP6 post-glycosylphosphatidylinositol attachment to proteins 6 [Source:NCBI gene;Acc:101802392]
ENSAPLG00020015010 protein_coding LINGO3 leucine rich repeat and Ig domain containing 3 [Source:HGNC Symbol;Acc:HGNC:21206]
ENSAPLG00020003094 lncRNA NA NA
ENSAPLG00020015289 protein_coding NA NA
ENSAPLG00020013527 protein_coding ADCY8 adenylate cyclase 8 [Source:NCBI gene;Acc:101794043]
ENSAPLG00020007580 pseudogene NA NA
ENSAPLG00020015905 protein_coding NA NA
ENSAPLG00020002125 protein_coding ACTA1 actin alpha 1, skeletal muscle [Source:NCBI gene;Acc:101802755]
ENSAPLG00020008456 protein_coding NA tight junction protein 2 [Source:NCBI gene;Acc:101798664]
ENSAPLG00020003263 protein_coding NA NA
ENSAPLG00020003115 protein_coding NA NA
ENSAPLG00020015577 snRNA NA U1 spliceosomal RNA [Source:RFAM;Acc:RF00003]
ENSAPLG00020012365 protein_coding NA NA
ENSAPLG00020011889 snRNA NA U2 spliceosomal RNA [Source:RFAM;Acc:RF00004]
ENSAPLG00020005820 pseudogene NA NA
ENSAPLG00020003671 pseudogene NA NA
ENSAPLG00020011926 Y_RNA NA Y RNA [Source:RFAM;Acc:RF00019]

Pathway analysis

de.entrez <- annot %>% filter(ensembl_gene_id %in% dt.tib$gene) %>% na.omit(entrezgene_id)
entrez.universe <- annot %>% na.omit(entrezgene_id)

K <- kegga(de.entrez$entrezgene_id, universe = entrez.universe$entrezgene_id, species.KEGG = "apla") %>% 
  as_tibble(rownames = NA) %>% 
  rownames_to_column("PathwayID") %>% 
  filter(P.DE < 0.05) %>% 
  arrange(P.DE)

keggList <- getGeneKEGGLinks(species.KEGG = "apla") %>% 
  as_tibble() %>% 
  rename("entrezgene_id" = GeneID)

keggGenes = data.frame()
for (path in nrow(K)){
    procPath <- filter(keggList, entrezgene_id %in% de.entrez$entrezgene_id)
    df <- data.frame(procPath)
    keggGenes <- rbind(keggGenes, df)
}

inner_join(keggGenes, K) %>%
  as_tibble() %>% 
  mutate(entrezgene_id = as.numeric(entrezgene_id)) %>% 
  left_join(annot) %>% 
  select(Pathway, PathwayID, N, DE, P.DE, ensembl_gene_id, hgnc_symbol, entrezgene_id) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Pathway PathwayID N DE P.DE ensembl_gene_id hgnc_symbol entrezgene_id
Steroid hormone biosynthesis path:apla00140 9 1 0.0229452 ENSAPLG00020005345 HSD11B2 101790751