Load libraries and read in data

Parse data by site & alignment type

# Isolate each location
(Beijing    <- data[data$site == "Beijing", ])
##                                  tree    site country type month     cases
## 1                              IQTREE Beijing   China CoV2     1  4.167989
## 2                              IQTREE Beijing   China CoV2     2  6.754129
## 3                              IQTREE Beijing   China CoV2     3  8.833776
....
(Tampere    <- data[data$site == "Tampere", ])
##                                  tree    site country type month     cases
## 13                             IQTREE Tampere Finland CoV2     1 10.988458
## 14                             IQTREE Tampere Finland CoV2     2 25.203085
## 15                             IQTREE Tampere Finland CoV2     3 14.314795
....
(Guangzhou  <- data[data$site == "Guangzhou", ])
##                                  tree      site country type month      cases
## 25                             IQTREE Guangzhou   China CoV2     1 14.3159109
## 26                             IQTREE Guangzhou   China CoV2     2 17.9851222
## 27                             IQTREE Guangzhou   China CoV2     3 10.0976023
....
(HongKong   <- data[data$site == "Hong Kong", ])
##                                  tree      site country type month     cases
## 37                             IQTREE Hong Kong   China CoV2     1  9.913466
## 38                             IQTREE Hong Kong   China CoV2     2  6.467976
## 39                             IQTREE Hong Kong   China CoV2     3  5.751280
....
(Sarlahi    <- data[data$site == "Sarlahi", ])
##                                  tree    site country type month      cases
## 49                             IQTREE Sarlahi   Nepal CoV2     1 16.4850927
## 50                             IQTREE Sarlahi   Nepal CoV2     2 20.7173315
## 51                             IQTREE Sarlahi   Nepal CoV2     3  8.5622148
....
(Amsterdam  <- data[data$site == "Amsterdam", ])
##                                  tree      site     country type month
## 61                             IQTREE Amsterdam Netherlands CoV2     1
## 62                             IQTREE Amsterdam Netherlands CoV2     2
## 63                             IQTREE Amsterdam Netherlands CoV2     3
....
(Norway     <- data[data$site == "Sor Trondelag", ])
##                                  tree          site country type month
## 73                             IQTREE Sor Trondelag  Norway CoV2     1
## 74                             IQTREE Sor Trondelag  Norway CoV2     2
## 75                             IQTREE Sor Trondelag  Norway CoV2     3
....
(NY         <- data[data$site == "New York", ])
##                                  tree     site country type month     cases
## 85                             IQTREE New York     USA CoV2     1 19.345595
## 86                             IQTREE New York     USA CoV2     2  8.277717
## 87                             IQTREE New York     USA CoV2     3  6.705154
....
(Japan      <- data[data$site == "Yamagata", ])
##                                  tree     site country type month     cases
## 145                            IQTREE Yamagata   Japan CoV2     1 17.505239
## 146                            IQTREE Yamagata   Japan CoV2     2 23.919595
## 147                            IQTREE Yamagata   Japan CoV2     3 18.287013
....
(Colorado   <- data[data$site == "Colorado", ])
##                                  tree     site country type month     cases
## 121                            IQTREE Colorado     USA CoV2    12  6.038022
## 122                            IQTREE Colorado     USA CoV2     2  5.358763
## 123                            IQTREE Colorado     USA CoV2     3 12.220972
....
(Gothenburg <- data[data$site == "Gothenburg", ])
##                                  tree       site country type month     cases
## 133                            IQTREE Gothenburg  Sweden CoV2     1 18.501600
## 134                            IQTREE Gothenburg  Sweden CoV2     2 19.477681
## 135                            IQTREE Gothenburg  Sweden CoV2     3 11.427418
....
(Stockholm  <- data[data$site == "Stockholm", ])
##                                  tree      site country type month      cases
## 97                             IQTREE Stockholm  Sweden CoV2     1 17.1945701
## 98                             IQTREE Stockholm  Sweden CoV2     2 17.6776837
## 99                             IQTREE Stockholm  Sweden CoV2     3 15.8657099
....
(Thailand   <- data[data$site == "Nakhon Si Thammarat", ])
##                                  tree                site  country type month
## 157                            IQTREE Nakhon Si Thammarat Thailand CoV2     1
## 158                            IQTREE Nakhon Si Thammarat Thailand CoV2     2
## 159                            IQTREE Nakhon Si Thammarat Thailand CoV2     3
....
(SKorea     <- data[data$site == "South Korea", ])
##                                  tree        site     country type month
## 109                            IQTREE South Korea South Korea CoV2     1
## 110                            IQTREE South Korea South Korea CoV2     2
## 111                            IQTREE South Korea South Korea CoV2     3
....
# Group NA + Europe & Asia locales together
ENA  <- rbind(NY, Colorado, Stockholm, Gothenburg, Amsterdam, Tampere, Norway)
Asia <- rbind(Beijing, Guangzhou, HongKong, Sarlahi, Japan, Thailand, SKorea)

# split further by alignment type
(ENAnonrecom   <- ENA[ENA$nonrecombinant == "yes", ])
##                                  tree          site     country type month
## 253            IQTREE_non_recombinant      New York         USA CoV2     1
## 254            IQTREE_non_recombinant      New York         USA CoV2     2
## 255            IQTREE_non_recombinant      New York         USA CoV2     3
....
(ENArecom      <- ENA[ENA$nonrecombinant == "no", ])
##                  tree          site     country type month      cases
## 85             IQTREE      New York         USA CoV2     1 19.3455952
## 86             IQTREE      New York         USA CoV2     2  8.2777173
## 87             IQTREE      New York         USA CoV2     3  6.7051536
....
(Asianonrecom  <- Asia[Asia$nonrecombinant == "yes", ])
##                                  tree                site     country type
## 169            IQTREE_non_recombinant             Beijing       China CoV2
## 170            IQTREE_non_recombinant             Beijing       China CoV2
## 171            IQTREE_non_recombinant             Beijing       China CoV2
....
(Asiarecom     <- Asia[Asia$nonrecombinant == "no", ])
##                  tree                site     country type month     cases
## 1              IQTREE             Beijing       China CoV2     1  4.167989
## 2              IQTREE             Beijing       China CoV2     2  6.754129
## 3              IQTREE             Beijing       China CoV2     3  8.833776
....
# Pull out the monthly average for each site for plotting bar charts
ENA_nonaverage  <- aggregate(ENAnonrecom$cases, list(ENAnonrecom$month, ENAnonrecom$site), mean)
ENA_average     <- aggregate(ENArecom$cases, list(ENArecom$month, ENArecom$site), mean)
Asia_nonaverage <- aggregate(Asianonrecom$cases, list(Asianonrecom$month, Asianonrecom$site), mean)
Asia_average    <- aggregate(Asiarecom$cases, list(Asiarecom$month, Asiarecom$site), mean)

# Rename columns
ENA_nonaverage <- setnames(ENA_nonaverage, old = c('Group.1','Group.2', 'x'), new = c('month','site', 'avg.cases'))
ENA_average <- setnames(ENA_average, old = c('Group.1','Group.2', 'x'), new = c('month','site', 'avg.cases'))
Asia_nonaverage <- setnames(Asia_nonaverage, old = c('Group.1','Group.2', 'x'), new = c('month','site', 'avg.cases'))
Asia_average <- setnames(Asia_average, old = c('Group.1','Group.2', 'x'), new = c('month','site', 'avg.cases'))

Plot monthly SARS-CoV-2 proportions by site & facet by tree type

Beijing

#pdf("Beijing_CoV.pdf")
ggplot() + geom_col(data = Beijing, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Tampere, Finland

#pdf("Tampere_CoV.pdf")
ggplot() + geom_col(data = Tampere, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Guangzhou, China

#pdf("Guangzhou_CoV.pdf")
ggplot() + geom_col(data = Guangzhou, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Hong Kong

#pdf("HongKong_CoV.pdf")
ggplot() + geom_col(data = HongKong, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Sarlahi, Nepal

#pdf("Sarlahi_CoV.pdf")
ggplot() + geom_col(data = Sarlahi, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Sarlahi", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Amsterdam

#pdf("Amsterdam_CoV.pdf")
ggplot() + geom_col(data = Amsterdam, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Amsterdam", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Trondelag, Norway

#pdf("Norway_CoV.pdf")
ggplot() + geom_col(data = Norway, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Norway", x = "Month", y = "Percent") + 
  facet_wrap(~ tree) 

#dev.off()

Thailand

#pdf("Thailand_CoV.pdf")
ggplot() + geom_col(data = Thailand, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Thailand", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Colorado

#pdf("Colorado_CoV.pdf")
ggplot() + geom_col(data = Colorado, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Colorado", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

New York

#pdf("NY_CoV.pdf")
ggplot() + geom_col(data = NY, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, New York", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Japan

#pdf("Japan_CoV.pdf")
ggplot() + geom_col(data = Japan, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Yamagata", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Gothenburg, Sweden

#pdf("Gothenburg_CoV.pdf")
ggplot() + geom_col(data = Gothenburg, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Gothenburg", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Stockholm, Sweden

#pdf("Stockholm_CoV.pdf")
ggplot() + geom_col(data = Stockholm, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, Stockholm", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Nationwide, South Korea

#pdf("SKorea_CoV.pdf")
ggplot() + geom_col(data = SKorea, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Proportion of Sars-Cov-2 cases by tree, South Korea", x = "Month", y = "Percent") + 
  facet_wrap(~ tree)

#dev.off()

Check sensitivity to method of time tree inference and use of only viral sequences deemed non-recombining

Europe+North America, nonrecombinant

# Plot points
#pdf("ENA_nonrecomb_beeswarm.pdf")
ggplot(ENAnonrecom, aes(month, cases, color=tree)) +
    geom_beeswarm(cex=1, size=.25) +
    facet_wrap(site~.) +
    ggtitle("ENA-nonrecom") +
    xlab("Month") +
    ylab("Parameter estimate") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#dev.off()

# Plot averages
#pdf("ENA_nonaverage.pdf")
ggplot(ENA_nonaverage, aes(month, avg.cases)) + 
    geom_bar(stat="identity") +
    facet_wrap(site~.) + 
    ylim(0,25)

#dev.off()

Europe+North America, recombinant

# Plot points 
#pdf("ENA_recomb_beeswarm.pdf")
ggplot(ENArecom, aes(month, cases, color=tree)) +
    geom_beeswarm(cex=1, size=.25) +
    facet_wrap(site~.) +
    ggtitle("ENA-recom") +
    xlab("Month") +
    ylab("Parameter estimate") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#dev.off()

# Plot averages
#pdf("ENA_average.pdf")
ggplot(ENA_average, aes(month, avg.cases)) + 
    geom_bar(stat="identity") +
    facet_wrap(site~.) + 
    ylim(0,25)

#dev.off()

Asia, nonrecombinant

# Plot points
#pdf("Asia_nonrecomb_beeswarm.pdf")
ggplot(Asianonrecom, aes(month, cases, color=tree)) +
    geom_beeswarm(cex=1, size=.25) +
    facet_wrap(site~.) +
    ggtitle("Asia-nonrecom") +
    xlab("Month") +
    ylab("Parameter estimate") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#dev.off()

# Plot averages
#pdf("Asia_nonaverageTest.pdf")
ggplot(Asia_nonaverage, aes(month, avg.cases)) + 
    geom_bar(stat="identity") +
    facet_wrap(site~.) + 
    ylim(0,40)

#dev.off()

Asia, recombinant

# Plot points
#pdf("Asia_recomb_beeswarm.pdf")
ggplot(Asiarecom, aes(month, cases, color=tree)) +
    geom_beeswarm(cex=1, size=.25) +
    facet_wrap(site~.) +
    ggtitle("Asia-recom") +
    xlab("Month") +
    ylab("Parameter estimate") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#dev.off()

# Plot averages
#pdf("Asia_average.pdf")
ggplot(Asia_average, aes(month, avg.cases)) + 
    geom_bar(stat="identity") +
    facet_wrap(site~.) + 
    ylim(0,30)

#dev.off()

Now let’s plot the endemic coronaviruses at each site

setwd('C:/Users/april/OneDrive/Desktop/Covid/phylopars_updated_7-22-21') # File path will need to be updated
df <- read.csv("CoV2_Imputations_IQTREE_time_tree.csv", stringsAsFactors = TRUE) # read data
df$month <- as.factor(df$month)

(cov229E  <- df[df$type == "229E", ])
##                    site     country type month     cases
## 1             Amsterdam Netherlands 229E     1 12.500000
## 2             Amsterdam Netherlands 229E     2 10.500000
## 3             Amsterdam Netherlands 229E     3 10.500000
....
(covOC43  <- df[df$type == "OC43", ])
##                    site     country type month      cases
## 37            Amsterdam Netherlands OC43     1 10.8000000
## 38            Amsterdam Netherlands OC43     2 11.4000000
## 39            Amsterdam Netherlands OC43     3 10.5000000
....
(covNL63  <- df[df$type == "NL63", ])
##                    site     country type month      cases
## 25            Amsterdam Netherlands NL63     1 10.0000000
## 26            Amsterdam Netherlands NL63     2 13.8000000
## 27            Amsterdam Netherlands NL63     3 13.8000000
....
(covHKU1  <- df[df$type == "HKU1", ])
##                    site     country type month      cases
## 13            Amsterdam Netherlands HKU1     1  8.6000000
## 14            Amsterdam Netherlands HKU1     2 11.7000000
## 15            Amsterdam Netherlands HKU1     3 11.7000000
....

229E

#pdf("229E_all.pdf")
ggplot() + geom_col(data = cov229E, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Relative monthly cases by coronavirus- 229E", x = "Month", y = "Percent") + 
  facet_wrap(~ site)+
  ylim(0,40)

#dev.off()

OC43

#pdf("OC43_all.pdf")
ggplot() + geom_col(data = covOC43, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Relative monthly cases by coronavirus- OC43", x = "Month", y = "Percent") + 
  facet_wrap(~ site)+
  ylim(0,40)

#dev.off()

NL63

#pdf("NL63_all.pdf")
ggplot() + geom_col(data = covNL63, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Relative monthly cases by coronavirus- NL63", x = "Month", y = "Percent") + 
  facet_wrap(~ site)+
  ylim(0,40)

#dev.off()

HKU1

#pdf("HKu1_all.pdf")
ggplot() + geom_col(data = covHKU1, aes(x = month, y = cases), position = "dodge") + 
  labs(title= "Relative monthly cases by coronavirus- HKU1", x = "Month", y = "Percent") + 
  facet_wrap(~ site)+
  ylim(0,40)

#dev.off()