Date generated: 2022-06-10
These are the R packages used to generate this report.
library(stringr)
library(readr)
library(dplyr)
library(tidyr)
library(rlang)
library(ggtree)
library(plotly)
library(ggplot2)
library(viridis)
library(formattable)
library(kableExtra)
packages <- (.packages())
clean_pkg_version <- function(x) {
y <- getNamespaceVersion(x)
z <- data.frame("Package" = x,
"Version" = y)
row.names(z) <- NULL
return(z)
}
lapply(packages, function(x) clean_pkg_version(x)) %>%
bind_rows() %>%
kbl() %>%
kable_styling(bootstrap_options = c("striped",
"hover",
"responsive"),
full_width = TRUE)
Package | Version |
---|---|
kableExtra | 1.3.4 |
formattable | 0.2.1 |
viridis | 0.6.2 |
viridisLite | 0.4.0 |
plotly | 4.10.0 |
ggplot2 | 3.3.5 |
ggtree | 3.2.1 |
rlang | 1.0.1 |
tidyr | 1.2.0 |
dplyr | 1.0.8 |
readr | 2.1.2 |
stringr | 1.4.0 |
stats | 4.1.2 |
graphics | 4.1.2 |
grDevices | 4.1.2 |
utils | 4.1.2 |
datasets | 4.1.2 |
methods | 4.1.2 |
base | 4.1.2 |
These are the tools used to run the analysis within the core genome workflow of ALPPACA.
tool_data <- data.frame(
Tool = c("Snippy",
"Gubbins",
"MaskRC",
"Snp-dists",
"Snp-sites",
"IQTree"),
Version = c("4.6.0",
"3.2",
"0.5",
"0.8.2",
"2.5.1",
"2.1.4")
)
tool_data %>%
kbl() %>%
kable_styling(bootstrap_options = c("striped",
"hover",
"responsive"),
full_width = TRUE)
Tool | Version |
---|---|
Snippy | 4.6.0 |
Gubbins | 3.2 |
MaskRC | 0.5 |
Snp-dists | 0.8.2 |
Snp-sites | 2.5.1 |
IQTree | 2.1.4 |
snippy_data <- read_delim(params$snippy_report,
delim = "\t",
show_col_types = FALSE)
options(scipen = 999)
alignment_length <- unique(snippy_data$LENGTH)
snippy_data %>%
select(-LENGTH) %>%
pivot_longer(cols = -ID,
values_to = "value",
names_to = "names") %>%
ggplot(aes(ID, value, fill = names)) +
geom_col() +
labs(x = NULL,
y = "Nucleotides",
fill = NULL) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3))
snpdist_data <- read_delim(params$snpdist_report,
delim = "\t",
show_col_types = FALSE) %>%
rename("isol1" = 1) %>%
pivot_longer(cols = -isol1,
names_to = "isol2",
values_to = "value")
snpdist_data %>%
filter(isol1 != isol2) %>%
mutate(Median = median(value),
Mean = round(mean(value), 0),
Range = paste0(min(value), " - ", max(value))) %>%
select(Mean, Median, Range) %>%
head(1) %>%
kbl() %>%
kable_styling(bootstrap_options = c("striped",
"hover",
"responsive"),
full_width = TRUE)
Mean | Median | Range |
---|---|---|
22056 | 23479 | 361 - 25205 |
ggplot(snpdist_data, aes(isol1, isol2, fill = value)) +
geom_tile() +
labs(fill = "SNP distance") +
scale_fill_viridis(direction = -1,
option = "D") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3),
axis.text = element_text(size = 4),
panel.grid = element_blank(),
axis.title = element_blank()) +
coord_fixed()
Figure 1: SNP distance matrix of all included isolates. A lighter color represents a smaller SNP distance.
# Read IQTree data
iqtree_data <- readLines(params$phylo_data)
aln_info <- sub(
"Input data: ",
"",
iqtree_data[str_detect(iqtree_data,"Input data:")]
)
const_sites <- sub(
".+= (.+) of all sites)",
"\\1",
iqtree_data[str_detect(iqtree_data,"Number of constant sites:")]
)
parsimony_sites <- sub(
"Number of parsimony informative sites: ",
"",
iqtree_data[str_detect(iqtree_data,"Number of parsimony informative sites:")]
)
iqtree_model <- sub(
"Model of substitution: ",
"",
iqtree_data[str_detect(iqtree_data,"Model of substitution: ")]
)
iqtree_cpu <- sub(
"Total CPU time used: ",
"",
iqtree_data[str_detect(iqtree_data,"Total CPU time used:")]
)
iqtree_wallclock <- sub(
"Total wall-clock time used: ",
"",
iqtree_data[str_detect(iqtree_data,"Total wall-clock time used:")]
)
IQTree was run on an alignment composed of 7 sequences with 5412463 nucleotide sites, where 98.8341% of the alignment were constant sites. The number of parsimonious sites were 26967. IQTree detected the evolutionary model GTR+F+I. IQTree used 139.864832 seconds (0h:2m:19s) of CPU time, which converts to 51.66601668 seconds (0h:0m:51s) of wall-clock time.
Here, the phylogenetic tree generated by IQTree is plotted. The figure is interactive, and you can zoom in and out and pan as you see fit.
tree <- read.tree(params$phylo_tree)
tree_plot <- ggtree(tree) +
geom_text(aes(label = label),
hjust = 0,
size = 2) +
geom_treescale()
ggplotly(tree_plot) %>%
plotly::style(textposition = "right")
Figure 2: Maximum likelihood phylogenetic tree generated by IQTree. Bootstrap values are presented for each node. The tree is unrooted