# install.packages(c("rjson", "tidyverse", "lubridate"))
library(rjson) # for reading api
library(tidyverse) # for visualisations
library(lubridate) # for converting character strings to dates
Ontario COVID-19 Vaccine Data https://data.ontario.ca/dataset/covid-19-vaccine-data-in-ontario/resource/8a89caa9-511c-4568-af89-7f2174b4378c?view_id=9e42f55b-723f-46dd-b0d9-643670e01fed published under an Open Government Licence – Ontario version 1.0 (https://www.ontario.ca/page/open-government-licence-ontario)
Statistics Canada. Table 17-10-0005-01 Population estimates on July 1st, by age and sex. https://doi.org/10.25318/1710000501-eng
vaxxdata <-
(
"https://data.ontario.ca/api/3/action/datastore_search?resource_id=8a89caa9-511c-4568-af89-7f2174b4378c"
)
ONpop2020 <-
read.csv(
"https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=1710000501&latestN=0&startDate=20200101&endDate=20200101&csvLocale=en&selectedMembers=%5B%5B7%5D%2C%5B1%5D%2C%5B1%2C7%2C13%2C19%2C25%2C31%2C37%2C43%2C49%2C55%2C61%2C67%2C73%2C79%2C85%2C86%2C87%2C88%2C89%2C105%2C126%2C132%2C138%5D%5D"
)
Slice Ontario population estimates to obtain a value for population over 15.
ONpop2020 <-
ONpop2020[c("Age.group", "VALUE")] # Keep only these columns
ONpop2020 <-
ONpop2020[-c(1, 23), ] # Remove total population and median age rows
ONpop2020all <-
sum(ONpop2020["VALUE"]) # obtain sum of total population as a numeric value
ONpop2020kids <-
ONpop2020 %>% slice(1:3) # remove age categories 0-4, 5-9, 10-14 from all population.
ONpop2020kids <-
sum(ONpop2020kids["VALUE"]) # Sum the age if categories 0-4, 5-9, 10-14.
ONpop2020adults <-
ONpop2020all - ONpop2020kids # Subtract sum total of values 0-4, 5-9, 10-14 from total population to obtain adult population estimate.
Transform vaccination data into a data frame.
vaxx <- fromJSON(file = vaxxdata) # Read in API JSON data
vaxx <- (vaxx$result$records) # Isolate vaccination data fields
vaxx <-
as.data.frame(do.call(cbind, vaxx)) # Bind days into a data frame.
vaxx <- t(vaxx) # transpose dataframe
vaxx <- as.data.frame(vaxx) # Save as data frame.
Clean vaccination data by removing commas and removing "_" from "_id".
vaxx$previous_day_doses_administered <-
gsub(",", "", vaxx$previous_day_doses_administered)
vaxx$total_doses_administered <-
gsub(",", "", vaxx$total_doses_administered)
vaxx$total_doses_in_fully_vaccinated_individuals <-
gsub(",", "", vaxx$total_doses_in_fully_vaccinated_individuals)
vaxx$total_individuals_fully_vaccinated <-
gsub(",", "", vaxx$total_individuals_fully_vaccinated)
names(vaxx)[names(vaxx) == "_id"] <- "id"
Convert strings in vaccination data into numbers.
vaxx$previous_day_doses_administered <-
as.numeric(as.list(vaxx$previous_day_doses_administered))
vaxx$total_doses_administered <-
as.numeric(as.list(vaxx$total_doses_administered))
vaxx$total_doses_in_fully_vaccinated_individuals <-
as.numeric(as.list(vaxx$total_doses_in_fully_vaccinated_individuals))
vaxx$total_individuals_fully_vaccinated <-
as.numeric(as.list(vaxx$total_individuals_fully_vaccinated))
vaxx$report_date <-
ymd_hms(vaxx$report_date) # converts report date into date.
vaxx[is.na(vaxx)] = 0 # convert NAs to zero.
The province did not administer any vaccines between 2020-12-25 and 2020-12-29 but omits this data from the data set. Add missing Christmas 2020 values.
christmas2020data <- data.frame(
id = c(0, 0, 0, 0, 0),
report_date = c(
"2020-12-25T00:00:00",
"2020-12-26T00:00:00",
"2020-12-27T00:00:00",
"2020-12-28T00:00:00",
"2020-12-29T00:00:00"
),
"previous_day_doses_administered" = c(10756, 0, 0, 0, 0),
"total_doses_administered" = c(10756, 10756, 10756, 10756, 10756),
"total_doses_in_fully_vaccinated_individuals" = c(0, 0, 0, 0, 0),
"total_individuals_fully_vaccinated" = c(0, 0, 0, 0, 0)
)
christmas2020data$report_date <-
ymd_hms(christmas2020data$report_date) # converts report date into date.
vaxx <- rbind(vaxx, christmas2020data) # merge with vaxx data
vaxx <- vaxx[order(vaxx$report_date),] # sort by report date
Calculate new columns for vaccination of percentages of adults either partly vaccinated or fully vaccinated using adult population estimate.
vaxx$percentagepart <-
((vaxx$total_doses_administered / ONpop2020adults) * 100)
vaxx$percentagefully <-
((vaxx$total_individuals_fully_vaccinated / ONpop2020adults) * 100)
Unlist the vaccination data for visualisation.
vaxx <- as.data.frame(lapply(vaxx, unlist)) # Unlist vaccination data
# Subset percentage data for visualisation.
vizpart <- subset(vaxx, select = c("report_date", "percentagepart"))
vizfull <-
subset(vaxx, select = c("report_date", "percentagefully"))
# standardise column names
names(vizpart)[names(vizpart) == "percentagepart"] <- "percentage"
names(vizfull)[names(vizfull) == "percentagefully"] <- "percentage"
Create line graph for vaccination progress data as a percentage of adult population.
ggplot(vizfull, aes(report_date, percentage)) +
geom_line(aes(linetype = "Fully")) +
geom_line(data = vizpart, aes(linetype = "Partly")) +
scale_y_continuous(limits = c(0, 100)) +
labs(
linetype = "Vaccinated",
x = "Reporting day",
y = "Percentage",
caption = "Source: Ontario COVID-19 vaccine data and StatsCan Ontario population estimate 2020",
title = "Ontario COVID-19 vaccination",
subtitle = "Percentage of adult population (15+)"
) +
theme(legend.position = "bottom")
Create a line and bar chart to show daily amounts of vaccine doses and cumulative total of vaccine doses administered.
# Subset data
dosesadministered <-
subset(
vaxx,
select = c(
"report_date",
"previous_day_doses_administered",
"total_doses_administered"
)
)
# Create new column for data administered using reported date minus 24 hours in seconds.
dosesadministered$administered_date <-
((dosesadministered$report_date - 86400))
dosesadministered <-
subset (dosesadministered, select = -report_date) # drop reported date column
names(dosesadministered) <-
c("doses", "totaldoses", "administered") # rename columns
dosesadministered$administered <-
as.Date(dosesadministered$administered) # ensure date is a date field and not charecter.
# Visualisation
ggplot() +
labs(title = "Ontario COVID-19 vaccination",
subtitle = "Daily doses of vaccine and cumilative doses administered",
caption = "Source: Ontario COVID-19 vaccine data") +
geom_bar(
mapping = aes(x = dosesadministered$administered, y = dosesadministered$doses),
stat = "identity"
) +
geom_line(
mapping = aes(x = dosesadministered$administered, y = dosesadministered$totaldoses *
.05),
size = 1,
color = "blue"
) +
scale_x_date(name = "Day") +
scale_y_continuous(name = "Cumilative total doses",
sec.axis = sec_axis( ~ . / 5, name = "Daily doses")) +
theme(
axis.title.y = element_text(color = "blue"),
axis.title.y.right = element_text(color = "black")
)
citation()
##
## To cite R in publications use:
##
## R Core Team (2018). R: A language and environment for statistical
## computing. R Foundation for Statistical Computing, Vienna, Austria.
## URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2018},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.
citation("rjson")
##
## To cite package 'rjson' in publications use:
##
## Alex Couture-Beil (2018). rjson: JSON for R. R package version
## 0.2.20. https://CRAN.R-project.org/package=rjson
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {rjson: JSON for R},
## author = {Alex Couture-Beil},
## year = {2018},
## note = {R package version 0.2.20},
## url = {https://CRAN.R-project.org/package=rjson},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("tidyverse")
##
## Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
## Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }
citation("lubridate")
##
## To cite lubridate in publications use:
##
## Garrett Grolemund, Hadley Wickham (2011). Dates and Times Made Easy
## with lubridate. Journal of Statistical Software, 40(3), 1-25. URL
## https://www.jstatsoft.org/v40/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Dates and Times Made Easy with {lubridate}},
## author = {Garrett Grolemund and Hadley Wickham},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {40},
## number = {3},
## pages = {1--25},
## url = {https://www.jstatsoft.org/v40/i03/},
## }