Published March 2, 2021
| Version v1
Dataset
Open
Dataset summary and supplemental figures for article: "No evidence known viruses play a role in the pathogenesis of onchocerciasis-associated epilepsy. An explorative metagenomic case control study"
Description
Dataset summary output of Hecatomb pipeline. These files are all you need to reproduce the figures in the manuscript.
# in R
# read data and metadata
data=read.table('big_table.tsv',sep='\t',header=T)
meta=read.table('metadata.tsv',sep='\t',header=T)
# combine
data=merge(data, meta, by.x='sample',by.y='sample_id')
# patients #7 and #14 were excluded
data = data[! data$individual %in% c(7,14),]
# filter (ignore non virus, ignore poor evalue, ignore nt-only hits)
data = data[data$Kingdom=='Viruses' & data$ali_evalue<1e-20 & data$nt_aa=='aa',]
# Figure 1
library(ggplot2)
ggplot(data) +
geom_point(aes(x=ali_len, y=ali_perc, color=source, size=count),alpha=0.1) +
facet_wrap(~Family) +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
theme(panel.spacing = unit(0,'mm'),
strip.background = element_rect(color = 'black',fill = 'white')) +
scale_x_continuous(name='Alignment length') +
scale_y_continuous(name='Alignment identitiy')
# etc.