Run information

Date generated: 2022-06-10

R Packages used

These are the R packages used to generate this report.

library(stringr)
library(readr)
library(dplyr)
library(tidyr)
library(rlang)
library(ggtree)
library(plotly)
library(ggplot2)
library(viridis)
library(formattable)
library(kableExtra)

packages <- (.packages())

clean_pkg_version <- function(x) {
  y <- getNamespaceVersion(x)
  
  z <- data.frame("Package" = x,
                  "Version" = y)
  
  row.names(z) <- NULL
  
  return(z)
}

lapply(packages, function(x) clean_pkg_version(x)) %>%
  bind_rows() %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped",
                                      "hover",
                                      "responsive"),
                full_width = TRUE)
Package Version
kableExtra 1.3.4
formattable 0.2.1
viridis 0.6.2
viridisLite 0.4.0
plotly 4.10.0
ggplot2 3.3.5
ggtree 3.2.1
rlang 1.0.1
tidyr 1.2.0
dplyr 1.0.8
readr 2.1.2
stringr 1.4.0
stats 4.1.2
graphics 4.1.2
grDevices 4.1.2
utils 4.1.2
datasets 4.1.2
methods 4.1.2
base 4.1.2


Tool versions

These are the tools used to run the analysis within the core genome workflow of ALPPACA.

tool_data <- data.frame(
  Tool = c("Snippy",
           "Gubbins",
           "MaskRC",
           "Snp-dists",
           "Snp-sites",
           "IQTree"),
  Version = c("4.6.0",
              "3.2",
              "0.5",
              "0.8.2",
              "2.5.1",
              "2.1.4")
)

tool_data %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped",
                                      "hover",
                                      "responsive"),
                full_width = TRUE)
Tool Version
Snippy 4.6.0
Gubbins 3.2
MaskRC 0.5
Snp-dists 0.8.2
Snp-sites 2.5.1
IQTree 2.1.4



Tool information

Snippy

snippy_data <- read_delim(params$snippy_report,
                          delim = "\t",
                          show_col_types = FALSE)

options(scipen = 999)

alignment_length <- unique(snippy_data$LENGTH)

snippy_data %>%
  select(-LENGTH) %>%
  pivot_longer(cols = -ID,
               values_to = "value",
               names_to = "names") %>%
  ggplot(aes(ID, value, fill = names)) +
  geom_col() +
  labs(x = NULL,
       y = "Nucleotides",
       fill = NULL) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3))

Figure 1: Summary statistics of the alignment generated by Snippy from mapped reads. The total alignment length was 5472672 nucleotides.


SNPdist

Table 3: SNP distance summary statistics for all isolates.
snpdist_data <- read_delim(params$snpdist_report,
                           delim = "\t",
                           show_col_types = FALSE) %>%
  rename("isol1" = 1) %>%
  pivot_longer(cols = -isol1,
               names_to = "isol2",
               values_to = "value")


snpdist_data %>%
  filter(isol1 != isol2) %>%
  mutate(Median = median(value),
         Mean = round(mean(value), 0),
         Range = paste0(min(value), " - ", max(value))) %>%
  select(Mean, Median, Range) %>%
  head(1) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped",
                                      "hover",
                                      "responsive"),
                full_width = TRUE)
Mean Median Range
22056 23479 361 - 25205
ggplot(snpdist_data, aes(isol1, isol2, fill = value)) +
  geom_tile() +
  labs(fill = "SNP distance") +
  scale_fill_viridis(direction = -1,
                     option = "D") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3),
    axis.text = element_text(size = 4),
        panel.grid = element_blank(),
        axis.title = element_blank()) +
  coord_fixed()
**Figure 1**: SNP distance matrix of all included isolates. A lighter color represents a smaller SNP distance.

Figure 1: SNP distance matrix of all included isolates. A lighter color represents a smaller SNP distance.


IQTree

# Read IQTree data
iqtree_data <- readLines(params$phylo_data)

aln_info <- sub(
  "Input data: ",
  "",
  iqtree_data[str_detect(iqtree_data,"Input data:")]
)

const_sites <- sub(
  ".+= (.+) of all sites)",
  "\\1",
  iqtree_data[str_detect(iqtree_data,"Number of constant sites:")]
)

parsimony_sites <- sub(
  "Number of parsimony informative sites: ",
  "",
  iqtree_data[str_detect(iqtree_data,"Number of parsimony informative sites:")]
)

iqtree_model <- sub(
  "Model of substitution: ",
  "",
  iqtree_data[str_detect(iqtree_data,"Model of substitution: ")]
)

iqtree_cpu <- sub(
  "Total CPU time used: ",
  "",
  iqtree_data[str_detect(iqtree_data,"Total CPU time used:")]
)

iqtree_wallclock <- sub(
  "Total wall-clock time used: ",
  "",
  iqtree_data[str_detect(iqtree_data,"Total wall-clock time used:")]
)

IQTree was run on an alignment composed of 7 sequences with 5412463 nucleotide sites, where 98.8341% of the alignment were constant sites. The number of parsimonious sites were 26967. IQTree detected the evolutionary model GTR+F+I. IQTree used 139.864832 seconds (0h:2m:19s) of CPU time, which converts to 51.66601668 seconds (0h:0m:51s) of wall-clock time.



Phylogenetic tree

Here, the phylogenetic tree generated by IQTree is plotted. The figure is interactive, and you can zoom in and out and pan as you see fit.

tree <- read.tree(params$phylo_tree)

tree_plot <- ggtree(tree) +
  geom_text(aes(label = label),
            hjust = 0,
        size = 2) +
  geom_treescale()

ggplotly(tree_plot) %>%
  plotly::style(textposition = "right")

Figure 2: Maximum likelihood phylogenetic tree generated by IQTree. Bootstrap values are presented for each node. The tree is unrooted