Establish design & contrast matrices
## Establish design matrix
design = model.matrix(~ 0 +
group +
age +
sex +
pool)
colnames(design) <- gsub("group", "", colnames(design))
contr.matrix = makeContrasts(
CtlvI1 = Ctl - I1,
CtlvI2 = Ctl - I2,
CtlvI5 = Ctl - I5,
CtlvI15 = Ctl - I15,
CtlvI29 = Ctl - I29,
I1vI2 = I1 - I2,
I1vI5 = I1 - I5,
I1vI15 = I1 - I15,
I1vI29 = I1 - I29,
I2vI5 = I2 - I5,
I2vI15 = I2 - I15,
I2vI29 = I2 - I29,
I5vI15 = I5 - I15,
I5vI29 = I5 - I29,
I15vI29 = I15 - I29,
levels = colnames(design))
## Mean-variance trend and sample weight plots
v <- voomWithQualityWeights(dge, design, plot = TRUE)

## Fitting the model
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
tfit <- treat(vfit, lfc = 0.5)
plotSA(tfit)

Volcano plot
tmp1 <- topTreat(tfit, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Control vs. Infected, Day 1") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp2 <- topTreat(tfit, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Control vs. Infected, Day 2") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp3 <- topTreat(tfit, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Control vs. Infected, Day 5") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp4 <- topTreat(tfit, coef = 4, n = Inf)
results4 <- mutate(tmp4, sig=ifelse(tmp4$adj.P.Val<0.1, "Sig", "Not Sig"))
p4 <- ggplot(results4, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Control vs. Infected, Day 15") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp5 <- topTreat(tfit, coef = 5, n = Inf)
results5 <- mutate(tmp5, sig=ifelse(tmp5$adj.P.Val<0.1, "Sig", "Not Sig"))
p5 <- ggplot(results5, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Control vs. Infected, Day 29") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp6 <- topTreat(tfit, coef = 6, n = Inf)
results6 <- mutate(tmp6, sig=ifelse(tmp6$adj.P.Val<0.1, "Sig", "Not Sig"))
p6 <- ggplot(results6, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 1 vs. Infected, Day 2") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp7 <- topTreat(tfit, coef = 7, n = Inf)
results7 <- mutate(tmp7, sig=ifelse(tmp7$adj.P.Val<0.1, "Sig", "Not Sig"))
p7 <- ggplot(results7, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 1 vs. Infected, Day 5") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp8 <- topTreat(tfit, coef = 8, n = Inf)
results8 <- mutate(tmp8, sig=ifelse(tmp8$adj.P.Val<0.1, "Sig", "Not Sig"))
p8 <- ggplot(results8, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 1 vs. Infected, Day 15") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp9 <- topTreat(tfit, coef = 9, n = Inf)
results9 <- mutate(tmp9, sig=ifelse(tmp9$adj.P.Val<0.1, "Sig", "Not Sig"))
p9 <- ggplot(results9, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 1 vs. Infected, Day 29") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp10 <- topTreat(tfit, coef = 10, n = Inf)
results10 <- mutate(tmp10, sig=ifelse(tmp10$adj.P.Val<0.1, "Sig", "Not Sig"))
p10 <- ggplot(results10, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 2 vs. Infected, Day 5") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp11 <- topTreat(tfit, coef = 11, n = Inf)
results11 <- mutate(tmp11, sig=ifelse(tmp11$adj.P.Val<0.1, "Sig", "Not Sig"))
p11 <- ggplot(results11, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 2 vs. Infected, Day 15") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp12 <- topTreat(tfit, coef = 12, n = Inf)
results12 <- mutate(tmp12, sig=ifelse(tmp12$adj.P.Val<0.1, "Sig", "Not Sig"))
p12 <- ggplot(results12, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 2 vs. Infected, Day 29") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp13 <- topTreat(tfit, coef = 13, n = Inf)
results13 <- mutate(tmp13, sig=ifelse(tmp13$adj.P.Val<0.1, "Sig", "Not Sig"))
p13 <- ggplot(results13, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 5 vs. Infected, Day 15") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp14 <- topTreat(tfit, coef = 14, n = Inf)
results14 <- mutate(tmp14, sig=ifelse(tmp14$adj.P.Val<0.1, "Sig", "Not Sig"))
p14 <- ggplot(results14, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 5 vs. Infected, Day 29") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
tmp15 <- topTreat(tfit, coef = 15, n = Inf)
results15 <- mutate(tmp15, sig=ifelse(tmp15$adj.P.Val<0.1, "Sig", "Not Sig"))
p15 <- ggplot(results15, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Infected, Day 15 vs. Infected, Day 29") +
ylab("-Log10(p-value)") +
xlab("Log2(Fold change)") +
theme_bw() +
theme(legend.position = "none")
grid.arrange(p1, p2, p3, p4, p5, p6, p7,
p8, p9, p10, p11, p12, p13, p14, p15, nrow = 8)

Heatmap
# Get the gene names for DE genes
lcpm2 <- as_tibble(dge$counts, rownames = NA) %>%
cpm(., log = TRUE)
newNames <- colnames(lcpm2) %>%
as_tibble() %>%
separate(value, into = c(NA, "bird")) %>%
bind_cols(covars) %>%
mutate(group = ifelse(group == "C1", "Ctl", group),
group = ifelse(group == "C29", "Ctl", group),
group = as.factor(group)) %>%
unite("newNames", group, bird)
colOrder <- covars %>%
mutate(group = ifelse(group == "C1", "Ctl", group),
group = ifelse(group == "C29", "Ctl", group),
group = as.factor(group)) %>%
mutate(group = fct_relevel(group, "Ctl", "I1", "I2", "I5", "I15", "I29")) %>%
arrange(group, bird) %>%
unite("newNames", group, bird)
colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]
heatmap.annot <- annot %>%
filter(ensembl_transcript_id %in% dt.tib$gene) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, ".")) %>%
unite(annot.hm, c(ensembl_transcript_id, hgnc_symbol), sep = "-", remove = FALSE) %>%
select(annot.hm, ensembl_transcript_id) %>%
distinct(annot.hm, .keep_all = TRUE)
deGeneCounts <- lcpm2 %>%
as_tibble() %>%
mutate(ensembl_transcript_id = rownames(dge$counts)) %>%
filter(ensembl_transcript_id %in% dt.tib$gene) %>%
left_join(heatmap.annot) %>%
column_to_rownames("annot.hm") %>%
select(-ensembl_transcript_id) %>%
as.matrix()
## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
hc <- hclust(as.dist(1-cor(t(deGeneCounts))))
# Plot the heatmap
heatmap.2(deGeneCounts,
Colv = FALSE,
Rowv = as.dendrogram(hc),
col = rev(morecols(50)),
trace = "none",
colsep = c(8, 23, 33, 48, 58),
dendrogram = "row",
density.info = "none",
key = TRUE,
margins = c(3, 20),
scale ="row")

Boxplots for differentially expressed genes
lcpm.DE <- lcpm %>%
as_tibble(rownames = NA) %>%
rownames_to_column("identifier") %>%
filter(identifier %in% dt.tib$gene) %>%
pivot_longer(cols = contains("_"),
names_to = "sample",
values_to = "lcpm") %>%
separate(sample, into = c("tissue", "bird")) %>%
mutate(bird = as.factor(bird)) %>%
left_join(covars, by = "bird") %>%
select(identifier, bird, lcpm, group) %>%
mutate(group = recode(group, C29 = "Control"),
group = recode(group, C1 = "Control"),
tmpID = ifelse(group == "Control", 'Control', 'Infected'))
aprioriPlotting <- function(target, ...) {
annot.target <- annot %>%
select(
ensembl_transcript_id,
hgnc_symbol) %>%
filter(ensembl_transcript_id == target) %>%
mutate(hgnc_symbol = replace_na(hgnc_symbol, "."))
plot <- lcpm.DE %>%
filter(identifier == target) %>%
mutate(group = fct_relevel(group, "Control", "I1", "I2", "I5", "I15", "I29")) %>%
ggplot(aes(x = group, y = lcpm)) +
facet_grid(. ~ tmpID, scales = "free", space = "free") +
ylab("Log2(Counts per million)") +
xlab("Group") +
geom_point(position = position_dodge(width=0.75), show.legend = FALSE) +
geom_boxplot(alpha = 0.5) +
geom_label_repel(aes(label = bird, fill = NULL), show.legend=FALSE) +
theme_classic() +
labs(title= paste0(target, " - ", annot.target[1,2])) +
theme(legend.title = element_blank())
print(plot)
}
for(target in sort(unique(lcpm.DE$identifier))) {
aprioriPlotting(target)
}






Annotations for differentially expressed genes
ns denotes non-significant genes for each comparison and numerical values are the Log2(Fold change) difference
ensembl_transcript_id
|
hgnc_symbol
|
CtlvI1
|
CtlvI2
|
CtlvI5
|
CtlvI15
|
CtlvI29
|
I1vI2
|
I1vI5
|
I1vI15
|
I1vI29
|
I2vI5
|
I2vI15
|
I2vI29
|
I5vI15
|
I5vI29
|
I15vI29
|
ENSAPLT00020022320
|
EDN2
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
1.65
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ENSAPLT00020026833
|
HSPA8
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
-3.5
|
ns
|
ns
|
ns
|
ns
|
ENSAPLT00020026836
|
HSPA8
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
-1.58
|
ns
|
ns
|
ns
|
ENSAPLT00020010104
|
NA
|
ns
|
4.13
|
ns
|
ns
|
ns
|
4.73
|
ns
|
ns
|
ns
|
-4.54
|
ns
|
ns
|
ns
|
ns
|
ns
|
ENSAPLT00020025810
|
NFATC3
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
-4.86
|
ns
|
ns
|
ENSAPLT00020008328
|
STAU1
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
ns
|
2.86
|
ns
|
ns
|
Gene functions
bind_rows(tmp1a, tmp2a, tmp3a, tmp4a, tmp5a, tmp6a, tmp7a, tmp8a, tmp9a, tmp10a, tmp11a, tmp12a, tmp13a, tmp14a, tmp15a) %>%
select(-comp, -adj.P.Val, -logFC) %>%
filter(ensembl_transcript_id %in% dt.tib$gene) %>%
distinct(.) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ensembl_transcript_id
|
transcript_biotype
|
hgnc_symbol
|
description
|
ENSAPLT00020022320
|
protein_coding
|
EDN2
|
endothelin 2 [Source:NCBI gene;Acc:101795578]
|
ENSAPLT00020026833
|
protein_coding
|
HSPA8
|
heat shock protein family A (Hsp70) member 8 [Source:NCBI gene;Acc:101801738]
|
ENSAPLT00020026836
|
protein_coding
|
HSPA8
|
heat shock protein family A (Hsp70) member 8 [Source:NCBI gene;Acc:101801738]
|
ENSAPLT00020010104
|
protein_coding
|
NA
|
pantetheinase [Source:NCBI gene;Acc:101798228]
|
ENSAPLT00020025810
|
protein_coding
|
NFATC3
|
nuclear factor of activated T cells 3 [Source:HGNC Symbol;Acc:HGNC:7777]
|
ENSAPLT00020008328
|
protein_coding
|
STAU1
|
staufen double-stranded RNA binding protein 1 [Source:NCBI gene;Acc:101803828]
|
Pathway analysis
de.entrez <- annot %>% filter(ensembl_transcript_id %in% dt.tib$gene) %>% na.omit(entrezgene_id)
entrez.universe <- annot %>% na.omit(entrezgene_id)
K <- kegga(de.entrez$entrezgene_id, universe = entrez.universe$entrezgene_id, species.KEGG = "apla") %>%
as_tibble(rownames = NA) %>%
rownames_to_column("PathwayID") %>%
filter(P.DE < 0.05) %>%
arrange(P.DE)
keggList <- getGeneKEGGLinks(species.KEGG = "apla") %>%
as_tibble() %>%
rename("entrezgene_id" = GeneID)
keggGenes = data.frame()
for (path in nrow(K)){
procPath <- filter(keggList, entrezgene_id %in% de.entrez$entrezgene_id)
df <- data.frame(procPath)
keggGenes <- rbind(keggGenes, df)
}
inner_join(keggGenes, K) %>%
as_tibble() %>%
mutate(entrezgene_id = as.numeric(entrezgene_id)) %>%
left_join(annot) %>%
select(Pathway, PathwayID, N, DE, P.DE, ensembl_transcript_id, hgnc_symbol, entrezgene_id) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Pathway
|
PathwayID
|
N
|
DE
|
P.DE
|
ensembl_transcript_id
|
hgnc_symbol
|
entrezgene_id
|
Spliceosome
|
path:apla03040
|
32
|
1
|
0.0272017
|
ENSAPLT00020026833
|
HSPA8
|
101801738
|
Spliceosome
|
path:apla03040
|
32
|
1
|
0.0272017
|
ENSAPLT00020026836
|
HSPA8
|
101801738
|
Protein processing in endoplasmic reticulum
|
path:apla04141
|
35
|
1
|
0.0297262
|
ENSAPLT00020026833
|
HSPA8
|
101801738
|
Protein processing in endoplasmic reticulum
|
path:apla04141
|
35
|
1
|
0.0297262
|
ENSAPLT00020026836
|
HSPA8
|
101801738
|
Vascular smooth muscle contraction
|
path:apla04270
|
30
|
1
|
0.0255162
|
ENSAPLT00020022320
|
EDN2
|
101795578
|