This is an analysis of differential expression at the gene level between supershedder groupings based on shedding rates measured on the day of sacrifice for infected mallard bursa samples.
## Load packages and data
library(limma)
library(edgeR)
library(Glimma)
library(gplots)
library(ggrepel)
library(RColorBrewer)
library(gridExtra)
library(kableExtra)
library(tidyverse)
## Load data
cnt <- read.table("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/all.genes.results.csv", header = TRUE) %>% as_tibble(rownames = NA)
covars <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/MALL70_SSgroup_RAW_11.5.20.csv", delim = ",")
annot <- read_delim("G:/Shared drives/RNA Seq Supershedder Project/MALL DE manuscript/mallardLPAIV/extData/annotationGenes103120.txt", delim = "\t")
newNames <- names(cnt) %>%
as_tibble() %>%
separate(value, into = c("pt1", "pt2", "pt3")) %>%
select(pt2, pt3) %>%
mutate(pt2 = as.numeric(pt2),
pt2 = formatC(pt2, width = 2, format = "d", flag = "0")) %>%
mutate(newName = paste0("bursa_", pt2))
colnames(cnt) <- newNames$newName
cnt <- cnt %>%
select(order(colnames(cnt)))
covars <- covars %>%
mutate(bird = as.numeric(bird)) %>%
mutate(bird = formatC(bird, width = 2, format = "d", flag = "0")) %>%
arrange(bird) %>%
mutate(bird = as.factor(bird)) %>%
mutate(group = str_remove(group, "-"))
## Remove birds with issues
cnt <- cnt %>% select(-bursa_01, -bursa_72)
covars <- covars %>% filter(bird != "01", bird != "72")
Here we standardize expression levels across samples of varying depth by using the counts per million (CPM) and trimmed mean of M-values (TMM) methods. We also remove any genes that are expressed at few than 0.5 CPM in at least 25% of individuals.
#Convert to DGEList object
dge <- DGEList(counts=cnt)
dge$genes <- annot[match(rownames(dge$counts), annot$ensembl_gene_id),]
#CPM and log-CPM
cpm <- cpm(dge)
lcpm <- cpm(dge, log = TRUE)
#### Retain only genes expressed at >0.5 CPM in at least 25% of individuals
table(rowSums(dge$counts==0) == length(cnt[1,]))
##
## FALSE TRUE
## 16979 1511
keep.exprs <- rowSums(cpm>0.5) >= length(cnt[1,])/4
sum(keep.exprs)
## [1] 13110
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
table(rowSums(dge$counts==0) == length(cnt[1,]))
##
## FALSE
## 13110
dim(dge)
## [1] 13110 68
Following normalization, we subset the data based on infection groups.
#Assign groups in DGElist
group <- recode(covars$group, C1 = "Ctl", C29 = "Ctl") %>%
as.factor()
dge$samples$group <- group
### Divide out I1 and I2
dge.I1 <- dge[,grep('\\bI1\\b', dge$samples$group)]
dge.I1 <- calcNormFactors(dge.I1, method = "TMM")
dge.I2 <- dge[,grep('\\bI2\\b', dge$samples$group)]
dge.I2 <- calcNormFactors(dge.I2, method = "TMM")
covars.I1 <- covars %>% filter(group == "I1")
covars.I2 <- covars %>% filter(group == "I2")
## Establish design matrix: I1
design.I1 = model.matrix(~ 0 +
factor(covars.I1$SSgroup.virus.sac) +
covars.I1$age +
covars.I1$sex +
covars.I1$Bursa_pool)
colnames(design.I1) <- c("HIGH", "LOW", "MODERATE",
"age", "sexM", "poolMSU2",
"poolMSU3", "poolMSU4", "poolMSU5")
contr.matrix.I1 = makeContrasts(
LvM = LOW - MODERATE,
MvH = MODERATE - HIGH,
LvH = LOW - HIGH,
levels = colnames(design.I1))
## Mean-variance trend plots
v.I1 <- voomWithQualityWeights(dge.I1, design.I1, plot = FALSE)
#Fitting models
vfit.I1 <- lmFit(v.I1, design.I1)
vfit.I1 <- contrasts.fit(vfit.I1, contrasts = contr.matrix.I1)
tfit.I1 <- treat(vfit.I1, lfc = 0.5)
## Identify DE genes
dt.I1 <- decideTests(tfit.I1, p.value = 0.1, adjust.method = "fdr")
dt.tib.I1 <- as_tibble(dt.I1, rownames = NA) %>%
rownames_to_column("gene") %>%
mutate_at(vars(starts_with("L")), as.numeric) %>%
mutate_at(vars(starts_with("M")), as.numeric) %>%
rename(LvM.I1 = LvM,
MvH.I1 = MvH,
LvH.I1 = LvH) %>%
filter_at(vars(LvM.I1:LvH.I1), any_vars(. != 0))
## Establish design matrix: I2
design.I2 = model.matrix(
~ 0 +
factor(covars.I2$SSgroup.virus.sac) +
covars.I2$age +
covars.I2$sex +
covars.I2$Bursa_pool
)
colnames(design.I2) <- c(
"HIGH",
"LOW",
"MODERATE",
"age",
"sexM",
"poolMSU2",
"poolMSU3",
"poolMSU4",
"poolMSU5"
)
contr.matrix.I2 = makeContrasts(
LvM = LOW - MODERATE,
MvH = MODERATE - HIGH,
LvH = LOW - HIGH,
levels = colnames(design.I2))
## Mean-variance trend plots
v.I2 <- voomWithQualityWeights(dge.I2, design.I2, plot = FALSE)
#Fitting models
vfit.I2 <- lmFit(v.I2, design.I2)
vfit.I2 <- contrasts.fit(vfit.I2, contrasts = contr.matrix.I2)
tfit.I2 <- treat(vfit.I2, lfc = 0.5)
## Identify DE genes
dt.I2 <- decideTests(tfit.I2, p.value = 0.1, adjust.method = "fdr")
dt.tib.I2 <- as_tibble(dt.I2, rownames = NA) %>%
rownames_to_column("gene") %>%
mutate_at(vars(starts_with("L")), as.numeric) %>%
mutate_at(vars(starts_with("M")), as.numeric) %>%
rename(LvM.I2 = LvM,
MvH.I2 = MvH,
LvH.I2 = LvH) %>%
filter_at(vars(LvM.I2:LvH.I2), any_vars(. != 0))
## Bring it all together
dt.tib <- full_join(dt.tib.I1, dt.tib.I2) %>%
mutate_at(vars(starts_with("L")), as.numeric) %>%
mutate_at(vars(starts_with("M")), as.numeric) %>%
replace(is.na(.), 0)
## Joining, by = "gene"
For a gene to be considered differentially expressed, we require a p-value of 0.1 with a false discovery rate correction and a log fold count difference of 1.
allResults <- rbind(
as.data.frame(summary(dt.I1)),
as.data.frame(summary(dt.I2))) %>%
filter(Var1 != "NotSig")
kable(allResults) %>%
kable_styling("striped", full_width = F) %>%
pack_rows("1 Day Post Infection", 1, 6) %>%
pack_rows("2 Days Post Infection", 7, 12)
Var1 | Var2 | Freq |
---|---|---|
1 Day Post Infection | ||
Down | LvM | 0 |
Up | LvM | 0 |
Down | MvH | 0 |
Up | MvH | 57 |
Down | LvH | 0 |
Up | LvH | 0 |
2 Days Post Infection | ||
Down | LvM | 3 |
Up | LvM | 4 |
Down | MvH | 5 |
Up | MvH | 7 |
Down | LvH | 5 |
Up | LvH | 7 |
tmp1 <- topTreat(tfit.I1, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. Moderate shedding, Day 1") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp2 <- topTreat(tfit.I1, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Moderate vs. High shedding, Day 1") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp3 <- topTreat(tfit.I1, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. High shedding, Day 1") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp4 <- topTreat(tfit.I2, coef = 1, n = Inf)
results4 <- mutate(tmp4, sig=ifelse(tmp4$adj.P.Val<0.1, "Sig", "Not Sig"))
p4 <- ggplot(results4, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. Moderate shedding, Day 2") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp5 <- topTreat(tfit.I2, coef = 2, n = Inf)
results5 <- mutate(tmp5, sig=ifelse(tmp5$adj.P.Val<0.1, "Sig", "Not Sig"))
p5 <- ggplot(results5, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Moderate vs. High shedding, Day 2") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp6 <- topTreat(tfit.I2, coef = 3, n = Inf)
results6 <- mutate(tmp6, sig=ifelse(tmp6$adj.P.Val<0.1, "Sig", "Not Sig"))
p6 <- ggplot(results6, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. High shedding, Day 2") +
ylab("-Log10(p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
grid.arrange(p1, p2, p3, p4, p5, p6, nrow = 2)
# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I1$counts, rownames = NA) %>%
cpm(., log = TRUE)
newNames <- colnames(lcpm2) %>%
as_tibble() %>%
separate(value, into = c(NA, "bird")) %>%
bind_cols(covars.I1) %>%
unite("newNames", SSgroup.virus.sac, group, bird)
colOrder <- bind_cols(covars.I1) %>%
mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
arrange(SSgroup.virus.sac, group, bird) %>%
unite("newNames", SSgroup.virus.sac, group, bird)
colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]
heatmap.annot <- annot %>%
filter(ensembl_gene_id %in% dt.tib.I1$gene) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>%
unite(annot.hm, c(ensembl_gene_id, hgnc_symbol), sep = "-", remove = FALSE) %>%
select(annot.hm, ensembl_gene_id)
deGeneCounts <- lcpm2 %>%
as_tibble() %>%
mutate(ensembl_gene_id = rownames(dge.I1$counts)) %>%
filter(ensembl_gene_id %in% dt.tib.I1$gene) %>%
left_join(heatmap.annot) %>%
column_to_rownames("annot.hm") %>%
select(-ensembl_gene_id) %>%
as.matrix()
## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
hc <- hclust(as.dist(1-cor(t(deGeneCounts))))
# Plot the heatmap
heatmap.2(deGeneCounts,
Colv = FALSE,
Rowv = as.dendrogram(hc),
col = rev(morecols(50)),
trace = "none",
colsep = c(6, 12),
dendrogram = "row",
density.info = "none",
key = TRUE,
margins = c(10, 14),
scale ="row")
# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I2$counts, rownames = NA) %>%
cpm(., log = TRUE)
newNames <- colnames(lcpm2) %>%
as_tibble() %>%
separate(value, into = c(NA, "bird")) %>%
bind_cols(covars.I2) %>%
unite("newNames", SSgroup.virus.sac, group, bird)
colOrder <- bind_cols(covars.I2) %>%
mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
arrange(SSgroup.virus.sac, group, bird) %>%
unite("newNames", SSgroup.virus.sac, group, bird)
colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]
heatmap.annot <- annot %>%
filter(ensembl_gene_id %in% dt.tib.I2$gene) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>%
unite(annot.hm, c(ensembl_gene_id, hgnc_symbol), sep = "-", remove = FALSE) %>%
select(annot.hm, ensembl_gene_id)
deGeneCounts <- lcpm2 %>%
as_tibble() %>%
mutate(ensembl_gene_id = rownames(dge.I2$counts)) %>%
filter(ensembl_gene_id %in% dt.tib.I2$gene) %>%
left_join(heatmap.annot) %>%
column_to_rownames("annot.hm") %>%
select(-ensembl_gene_id) %>%
as.matrix()
## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
hc <- hclust(as.dist(1-cor(t(deGeneCounts))))
# Plot the heatmap
heatmap.2(deGeneCounts,
Colv = FALSE,
Rowv = as.dendrogram(hc),
col = rev(morecols(50)),
trace = "none",
colsep = c(4, 8),
dendrogram = "row",
density.info = "none",
key = TRUE,
margins = c(12, 12),
scale ="row")
lcpm.DE <- lcpm %>%
as_tibble(rownames = NA) %>%
rownames_to_column("identifier") %>%
filter(identifier %in% dt.tib.I1$gene) %>%
pivot_longer(cols = contains("_"),
names_to = "sample",
values_to = "lcpm") %>%
separate(sample, into = c("tissue", "bird")) %>%
mutate(bird = as.factor(bird)) %>%
left_join(covars, by = "bird") %>%
filter(group == "C1" | group == "C14" | group == "I1") %>%
select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>%
mutate(group = recode(group, C29 = "Control"),
group = recode(group, C1 = "Control"),
tmpID = ifelse(group == "Control", 'Control', 'Infected'))
aprioriPlotting <- function(target, ...) {
annot.target <- annot %>%
select(
ensembl_gene_id,
hgnc_symbol) %>%
filter(ensembl_gene_id == target) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))
plot <- lcpm.DE %>%
filter(identifier == target) %>%
mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
facet_grid(. ~ tmpID, scales = "free", space = "free") +
ylab("Log2(Counts per million)") +
xlab("Shedding Group") +
scale_fill_grey(start = 0.35, end = 1) +
geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
geom_boxplot(alpha = 0.5) +
geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
theme_classic() +
labs(title = paste0("I1: ", target, " - ", annot.target[1,2])) +
theme(legend.title = element_blank())
print(plot)
}
for(target in sort(unique(dt.tib.I1$gene))) {
aprioriPlotting(target)
}
lcpm.DE <- lcpm %>%
as_tibble(rownames = NA) %>%
rownames_to_column("identifier") %>%
filter(identifier %in% dt.tib.I2$gene) %>%
pivot_longer(cols = contains("_"),
names_to = "sample",
values_to = "lcpm") %>%
separate(sample, into = c("tissue", "bird")) %>%
mutate(bird = as.factor(bird)) %>%
left_join(covars, by = "bird") %>%
filter(group == "C1" | group == "C14" | group == "I2") %>%
select(identifier, bird, lcpm, SSgroup.virus.sac, group) %>%
mutate(group = recode(group, C29 = "Control"),
group = recode(group, C1 = "Control"),
tmpID = ifelse(group == "Control", 'Control', 'Infected'))
aprioriPlotting <- function(target, ...) {
annot.target <- annot %>%
select(
ensembl_gene_id,
hgnc_symbol) %>%
filter(ensembl_gene_id == target) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))
plot <- lcpm.DE %>%
filter(identifier == target) %>%
mutate(SSgroup.virus.sac = fct_relevel(SSgroup.virus.sac, "LOW", "MODERATE", "HIGH")) %>%
ggplot(aes(x = SSgroup.virus.sac, y = lcpm)) +
facet_grid(. ~ tmpID, scales = "free", space = "free") +
ylab("Log2(Counts per million)") +
xlab("Shedding Group") +
scale_fill_grey(start = 0.35, end = 1) +
geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
geom_boxplot(alpha = 0.5) +
geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
theme_classic() +
labs(title = paste0("I2: ", target, " - ", annot.target[1,2])) +
theme(legend.title = element_blank())
print(plot)
}
for(target in sort(unique(dt.tib.I2$gene))) {
aprioriPlotting(target)
}
ns denotes non-significant genes for each comparison and numerical values are the log(fold count) difference
tmp1a <- tmp1 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I1")
tmp2a <- tmp2 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I1")
tmp3a <- tmp3 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I1")
tmp4a <- tmp4 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvM.I2")
tmp5a <- tmp5 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "MvH.I2")
tmp6a <- tmp6 %>% select(ensembl_gene_id, adj.P.Val, gene_biotype, logFC, hgnc_symbol, description) %>% mutate(comp = "LvH.I2")
bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>%
select(-description) %>%
filter(ensembl_gene_id %in% dt.tib$gene) %>%
mutate(logFoldCount = ifelse(adj.P.Val < 0.10, round(logFC, 2), "ns")) %>%
select(-adj.P.Val, -logFC, -gene_biotype) %>%
pivot_wider(names_from = comp, values_from = logFoldCount) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id | hgnc_symbol | LvM.I1 | MvH.I1 | LvH.I1 | LvM.I2 | MvH.I2 | LvH.I2 |
---|---|---|---|---|---|---|---|
ENSAPLG00020012805 | NA | ns | 5.88 | ns | ns | ns | ns |
ENSAPLG00020012792 | NA | ns | 6.33 | ns | ns | ns | ns |
ENSAPLG00020015339 | NA | ns | 6.72 | ns | ns | ns | ns |
ENSAPLG00020017609 | NA | ns | 7.28 | ns | ns | ns | ns |
ENSAPLG00020005067 | NA | ns | 4.56 | ns | ns | ns | ns |
ENSAPLG00020017000 | MSLNL | ns | 4.8 | ns | ns | ns | ns |
ENSAPLG00020006243 | RARRES1 | ns | 4.75 | ns | ns | ns | ns |
ENSAPLG00020010722 | DUOX2 | ns | 4.75 | ns | ns | ns | ns |
ENSAPLG00020016974 | NOXO1 | ns | 6 | ns | ns | ns | ns |
ENSAPLG00020002670 | FAM3D | ns | 4.89 | ns | ns | ns | ns |
ENSAPLG00020015459 | ATP10B | ns | 4.7 | ns | ns | ns | ns |
ENSAPLG00020013506 | NA | ns | 6.55 | ns | ns | ns | 7.11 |
ENSAPLG00020011723 | NA | ns | 4.51 | ns | ns | ns | ns |
ENSAPLG00020000599 | NA | ns | 5.08 | ns | ns | ns | ns |
ENSAPLG00020006095 | NA | ns | 4.04 | ns | ns | ns | ns |
ENSAPLG00020003190 | SLC5A1 | ns | 4.46 | ns | ns | ns | ns |
ENSAPLG00020001770 | AGR2 | ns | 5.28 | ns | ns | ns | ns |
ENSAPLG00020003213 | NA | ns | 6.78 | ns | ns | ns | ns |
ENSAPLG00020006537 | S100A12 | ns | 6.04 | ns | ns | ns | ns |
ENSAPLG00020009922 | NOX1 | ns | 4.52 | ns | ns | ns | ns |
ENSAPLG00020002072 | NA | ns | 4.91 | ns | ns | ns | ns |
ENSAPLG00020017274 | QSOX1 | ns | 2.96 | ns | ns | ns | ns |
ENSAPLG00020000995 | NA | ns | 8.55 | ns | ns | ns | ns |
ENSAPLG00020011745 | LYZ | ns | 3.46 | ns | ns | 9.66 | 7.53 |
ENSAPLG00020016998 | NA | ns | 2.69 | ns | ns | ns | ns |
ENSAPLG00020010988 | XDH | ns | 1.74 | ns | ns | ns | ns |
ENSAPLG00020016372 | LPO | ns | 4.56 | ns | ns | ns | ns |
ENSAPLG00020005548 | NA | ns | 7.43 | ns | ns | ns | ns |
ENSAPLG00020006319 | DIO2 | ns | 3.32 | ns | ns | ns | ns |
ENSAPLG00020005338 | ATP13A4 | ns | 4.3 | ns | ns | ns | ns |
ENSAPLG00020015787 | SLC34A2 | ns | 5.22 | ns | ns | ns | ns |
ENSAPLG00020012965 | NA | ns | ns | ns | ns | 12.44 | 9.72 |
ENSAPLG00020014762 | COL19A1 | ns | 4.17 | ns | ns | ns | ns |
ENSAPLG00020005112 | NA | ns | 3.98 | ns | ns | ns | ns |
ENSAPLG00020011196 | NA | ns | 5.73 | ns | ns | ns | ns |
ENSAPLG00020002067 | TMEM184A | ns | 4.89 | ns | ns | ns | ns |
ENSAPLG00020016695 | NA | ns | 2.2 | ns | ns | ns | ns |
ENSAPLG00020005345 | HSD11B2 | ns | 4.27 | ns | ns | ns | ns |
ENSAPLG00020009946 | HPGD | ns | 3.82 | ns | ns | ns | ns |
ENSAPLG00020011564 | NA | ns | 5.39 | ns | ns | ns | ns |
ENSAPLG00020012541 | SESN2 | ns | 3.73 | ns | ns | ns | ns |
ENSAPLG00020015729 | C4orf19 | ns | 4.08 | ns | ns | ns | ns |
ENSAPLG00020005545 | EXOC3L4 | ns | 3.52 | ns | ns | ns | ns |
ENSAPLG00020015525 | NA | ns | ns | ns | ns | 25.08 | 20.17 |
ENSAPLG00020012091 | NA | ns | 6.48 | ns | ns | ns | ns |
ENSAPLG00020001655 | CRISP3 | ns | 5.94 | ns | ns | ns | ns |
ENSAPLG00020002852 | NA | ns | 4.2 | ns | ns | ns | ns |
ENSAPLG00020005852 | NA | ns | 3.46 | ns | ns | ns | ns |
ENSAPLG00020014622 | NA | ns | 2.1 | ns | ns | ns | ns |
ENSAPLG00020016310 | CFTR | ns | 3.03 | ns | ns | ns | ns |
ENSAPLG00020016962 | NA | ns | ns | ns | -7.37 | 18.86 | 11.49 |
ENSAPLG00020005866 | DUSP16 | ns | 2.21 | ns | ns | ns | ns |
ENSAPLG00020000142 | SLC9A4 | ns | 5.57 | ns | ns | ns | ns |
ENSAPLG00020006621 | NA | ns | 2.85 | ns | ns | ns | ns |
ENSAPLG00020018298 | PGAP6 | ns | 1.9 | ns | ns | ns | ns |
ENSAPLG00020015010 | LINGO3 | ns | 5.69 | ns | ns | ns | ns |
ENSAPLG00020003094 | NA | ns | ns | ns | 5.15 | -16.49 | -11.34 |
ENSAPLG00020015289 | NA | ns | 4.4 | ns | ns | ns | ns |
ENSAPLG00020013527 | ADCY8 | ns | 3.68 | ns | ns | ns | ns |
ENSAPLG00020007580 | NA | ns | 3.46 | ns | ns | ns | ns |
ENSAPLG00020015905 | NA | ns | ns | ns | 5.15 | -15.34 | -10.18 |
ENSAPLG00020002125 | ACTA1 | ns | ns | ns | ns | 15.2 | 10.92 |
ENSAPLG00020008456 | NA | ns | 1.73 | ns | ns | ns | ns |
ENSAPLG00020003263 | NA | ns | ns | ns | ns | 12.82 | 10.72 |
ENSAPLG00020003115 | NA | ns | ns | ns | 3.9 | -10.73 | ns |
ENSAPLG00020015577 | NA | ns | ns | ns | -5.5 | ns | ns |
ENSAPLG00020012365 | NA | ns | ns | ns | ns | -31.17 | -22.97 |
ENSAPLG00020011889 | NA | ns | ns | ns | ns | -23.71 | -18.98 |
ENSAPLG00020005820 | NA | ns | ns | ns | 5.62 | ns | ns |
ENSAPLG00020003671 | NA | ns | ns | ns | ns | ns | -5.87 |
ENSAPLG00020011926 | NA | ns | ns | ns | -4 | 9.59 | ns |
bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a) %>%
select(-comp, -adj.P.Val, -logFC) %>%
filter(ensembl_gene_id %in% dt.tib$gene) %>%
distinct(.) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_gene_id | gene_biotype | hgnc_symbol | description |
---|---|---|---|
ENSAPLG00020012805 | protein_coding | NA | NA |
ENSAPLG00020012792 | protein_coding | NA | NA |
ENSAPLG00020015339 | protein_coding | NA | NA |
ENSAPLG00020017609 | protein_coding | NA | solute carrier family 6 member 14 [Source:NCBI gene;Acc:101802830] |
ENSAPLG00020005067 | pseudogene | NA | NA |
ENSAPLG00020017000 | protein_coding | MSLNL | mesothelin like [Source:HGNC Symbol;Acc:HGNC:14170] |
ENSAPLG00020006243 | protein_coding | RARRES1 | retinoic acid receptor responder 1 [Source:HGNC Symbol;Acc:HGNC:9867] |
ENSAPLG00020010722 | protein_coding | DUOX2 | dual oxidase 2 [Source:HGNC Symbol;Acc:HGNC:13273] |
ENSAPLG00020016974 | protein_coding | NOXO1 | NADPH oxidase organizer 1 [Source:HGNC Symbol;Acc:HGNC:19404] |
ENSAPLG00020002670 | protein_coding | FAM3D | FAM3 metabolism regulating signaling molecule D [Source:HGNC Symbol;Acc:HGNC:18665] |
ENSAPLG00020015459 | protein_coding | ATP10B | ATPase phospholipid transporting 10B (putative) [Source:HGNC Symbol;Acc:HGNC:13543] |
ENSAPLG00020013506 | protein_coding | NA | beta-microseminoprotein [Source:NCBI gene;Acc:101795347] |
ENSAPLG00020011723 | protein_coding | NA | ovoinhibitor [Source:NCBI gene;Acc:101798824] |
ENSAPLG00020000599 | protein_coding | NA | NA |
ENSAPLG00020006095 | protein_coding | NA | NA |
ENSAPLG00020003190 | protein_coding | SLC5A1 | solute carrier family 5 member 1 [Source:HGNC Symbol;Acc:HGNC:11036] |
ENSAPLG00020001770 | protein_coding | AGR2 | anterior gradient 2, protein disulphide isomerase family member [Source:HGNC Symbol;Acc:HGNC:328] |
ENSAPLG00020003213 | protein_coding | NA | NA |
ENSAPLG00020006537 | protein_coding | S100A12 | S100 calcium binding protein A12 [Source:HGNC Symbol;Acc:HGNC:10489] |
ENSAPLG00020009922 | protein_coding | NOX1 | NADPH oxidase 1 [Source:NCBI gene;Acc:101798310] |
ENSAPLG00020002072 | protein_coding | NA | NA |
ENSAPLG00020017274 | protein_coding | QSOX1 | quiescin sulfhydryl oxidase 1 [Source:HGNC Symbol;Acc:HGNC:9756] |
ENSAPLG00020000995 | protein_coding | NA | NA |
ENSAPLG00020011745 | protein_coding | LYZ | lysozyme [Source:HGNC Symbol;Acc:HGNC:6740] |
ENSAPLG00020016998 | protein_coding | NA | NA |
ENSAPLG00020010988 | protein_coding | XDH | xanthine dehydrogenase [Source:HGNC Symbol;Acc:HGNC:12805] |
ENSAPLG00020016372 | protein_coding | LPO | lactoperoxidase [Source:HGNC Symbol;Acc:HGNC:6678] |
ENSAPLG00020005548 | protein_coding | NA | NA |
ENSAPLG00020006319 | protein_coding | DIO2 | iodothyronine deiodinase 2 [Source:HGNC Symbol;Acc:HGNC:2884] |
ENSAPLG00020005338 | protein_coding | ATP13A4 | probable cation-transporting ATPase 13A4 [Source:NCBI gene;Acc:101794117] |
ENSAPLG00020015787 | protein_coding | SLC34A2 | solute carrier family 34 member 2 [Source:NCBI gene;Acc:101803835] |
ENSAPLG00020012965 | protein_coding | NA | Anas platyrhynchos free fatty acid receptor 4 (FFAR4), mRNA. [Source:RefSeq mRNA;Acc:NM_001310831] |
ENSAPLG00020014762 | protein_coding | COL19A1 | collagen type XIX alpha 1 chain [Source:HGNC Symbol;Acc:HGNC:2196] |
ENSAPLG00020005112 | protein_coding | NA | NA |
ENSAPLG00020011196 | protein_coding | NA | NA |
ENSAPLG00020002067 | protein_coding | TMEM184A | transmembrane protein 184A [Source:HGNC Symbol;Acc:HGNC:28797] |
ENSAPLG00020016695 | protein_coding | NA | NA |
ENSAPLG00020005345 | protein_coding | HSD11B2 | hydroxysteroid 11-beta dehydrogenase 2 [Source:NCBI gene;Acc:101790751] |
ENSAPLG00020009946 | protein_coding | HPGD | 15-hydroxyprostaglandin dehydrogenase [Source:HGNC Symbol;Acc:HGNC:5154] |
ENSAPLG00020011564 | protein_coding | NA | NA |
ENSAPLG00020012541 | protein_coding | SESN2 | sestrin 2 [Source:HGNC Symbol;Acc:HGNC:20746] |
ENSAPLG00020015729 | protein_coding | C4orf19 | chromosome 4 C4orf19 homolog [Source:NCBI gene;Acc:101796779] |
ENSAPLG00020005545 | protein_coding | EXOC3L4 | exocyst complex component 3 like 4 [Source:HGNC Symbol;Acc:HGNC:20120] |
ENSAPLG00020015525 | protein_coding | NA | NA |
ENSAPLG00020012091 | protein_coding | NA | NA |
ENSAPLG00020001655 | protein_coding | CRISP3 | cysteine rich secretory protein 3 [Source:HGNC Symbol;Acc:HGNC:16904] |
ENSAPLG00020002852 | protein_coding | NA | NA |
ENSAPLG00020005852 | protein_coding | NA | NA |
ENSAPLG00020014622 | protein_coding | NA | NA |
ENSAPLG00020016310 | protein_coding | CFTR | CF transmembrane conductance regulator [Source:HGNC Symbol;Acc:HGNC:1884] |
ENSAPLG00020016962 | scaRNA | NA | small Cajal body-specific RNA 2 [Source:HGNC Symbol;Acc:HGNC:32558] |
ENSAPLG00020005866 | protein_coding | DUSP16 | dual specificity phosphatase 16 [Source:NCBI gene;Acc:101790899] |
ENSAPLG00020000142 | protein_coding | SLC9A4 | solute carrier family 9 member A4 [Source:HGNC Symbol;Acc:HGNC:11077] |
ENSAPLG00020006621 | protein_coding | NA | protein kinase C eta [Source:NCBI gene;Acc:101799041] |
ENSAPLG00020018298 | protein_coding | PGAP6 | post-glycosylphosphatidylinositol attachment to proteins 6 [Source:NCBI gene;Acc:101802392] |
ENSAPLG00020015010 | protein_coding | LINGO3 | leucine rich repeat and Ig domain containing 3 [Source:HGNC Symbol;Acc:HGNC:21206] |
ENSAPLG00020003094 | lncRNA | NA | NA |
ENSAPLG00020015289 | protein_coding | NA | NA |
ENSAPLG00020013527 | protein_coding | ADCY8 | adenylate cyclase 8 [Source:NCBI gene;Acc:101794043] |
ENSAPLG00020007580 | pseudogene | NA | NA |
ENSAPLG00020015905 | protein_coding | NA | NA |
ENSAPLG00020002125 | protein_coding | ACTA1 | actin alpha 1, skeletal muscle [Source:NCBI gene;Acc:101802755] |
ENSAPLG00020008456 | protein_coding | NA | tight junction protein 2 [Source:NCBI gene;Acc:101798664] |
ENSAPLG00020003263 | protein_coding | NA | NA |
ENSAPLG00020003115 | protein_coding | NA | NA |
ENSAPLG00020015577 | snRNA | NA | U1 spliceosomal RNA [Source:RFAM;Acc:RF00003] |
ENSAPLG00020012365 | protein_coding | NA | NA |
ENSAPLG00020011889 | snRNA | NA | U2 spliceosomal RNA [Source:RFAM;Acc:RF00004] |
ENSAPLG00020005820 | pseudogene | NA | NA |
ENSAPLG00020003671 | pseudogene | NA | NA |
ENSAPLG00020011926 | Y_RNA | NA | Y RNA [Source:RFAM;Acc:RF00019] |
de.entrez <- annot %>% filter(ensembl_gene_id %in% dt.tib$gene) %>% na.omit(entrezgene_id)
entrez.universe <- annot %>% na.omit(entrezgene_id)
K <- kegga(de.entrez$entrezgene_id, universe = entrez.universe$entrezgene_id, species.KEGG = "apla") %>%
as_tibble(rownames = NA) %>%
rownames_to_column("PathwayID") %>%
filter(P.DE < 0.05) %>%
arrange(P.DE)
keggList <- getGeneKEGGLinks(species.KEGG = "apla") %>%
as_tibble() %>%
rename("entrezgene_id" = GeneID)
keggGenes = data.frame()
for (path in nrow(K)){
procPath <- filter(keggList, entrezgene_id %in% de.entrez$entrezgene_id)
df <- data.frame(procPath)
keggGenes <- rbind(keggGenes, df)
}
inner_join(keggGenes, K) %>%
as_tibble() %>%
mutate(entrezgene_id = as.numeric(entrezgene_id)) %>%
left_join(annot) %>%
select(Pathway, PathwayID, N, DE, P.DE, ensembl_gene_id, hgnc_symbol, entrezgene_id) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Pathway | PathwayID | N | DE | P.DE | ensembl_gene_id | hgnc_symbol | entrezgene_id |
---|---|---|---|---|---|---|---|
Steroid hormone biosynthesis | path:apla00140 | 9 | 1 | 0.0229452 | ENSAPLG00020005345 | HSD11B2 | 101790751 |