suppressPackageStartupMessages({
library(tidyverse)
library(ggpubr)
library(lubridate)
library(knitr)
source("scripts/plots/utils/utils.R")
source("scripts/data_preproc/preproc_functions.R")
})
options(digits=3)
knitr::opts_chunk$set(cache=TRUE)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
filename <- "results/region-sparse-lockdown-latest.Rdata"
loaded_data <- new.env()
load(filename, envir = loaded_data)
stanfit <- loaded_data$out
stan_data <- loaded_data$stan_data
suppressMessages(
dataset <-
read_csv(
"results/merge_spf_opencovid_agences_regionales_sante/mergedDataReadyToAnalyze.csv"
) %>%
prepare_french_region_dataset(keep.all = F) %>%
group_by(region, region_id) %>%
group_modify(function (region_data, keys) {
message(keys$region %>% str_to_upper())
message(" + PADDING : ")
region_data <- pad_region_data(region_data)
message(" + SLICING : ")
region_data <- slice_from_onset(region_data)
return(region_data)
})
)
## Load french region population data
ifr_pop_data <-
read.csv("data/region_wise_IFR.csv", sep=";") %>%
prepare_regions_pop_ifr()
LOCKDOWN_END <- ymd("2020-05-11")
LOCKDOWN <- ymd("2020-03-17")
stanfit <- stanfit %>% add_sample_deaths_negbinom()
regional_data <- dataset %>%
group_map(
make_plot_data_region, stanfit,
quantile_names=c(0.025, 0.975, 0.25, 0.75, "median")
) %>%
bind_rows() %>%
filter(variable_name != "rt_csum") %>%
as_tibble()
national_data <- regional_data %>%
filter(!str_detect(variable_name, "^rt")) %>%
group_by(variable_name, date) %>%
summarise_at(vars(`0.025`, `0.975`, `0.25`, `0.75`, median, mean, obs_deaths_csum), sum) %>%
ungroup()
avg_national_Rt <- function(dataset, Rt_output, ifr_pop_data, dates){
dataset %>%
# pivot Rt samples to long, add region data
group_map(function(region_data, keys){
region_id <- keys$region_id
reg_Rt <- Rt_output[,,region_id]
t(reg_Rt) %>% as.data.frame() %>%
rownames_to_column("day_id") %>%
pivot_longer(cols=-day_id, names_to="sample", values_to="Rt") %>%
mutate(
day_id = as.integer(day_id),
region_id=region_id,
date = min(region_data$date) + day_id - 1,
sample = str_remove(sample, "V") %>% as.integer()
) %>%
inner_join(ifr_pop_data, by="region_id")
}) %>%
bind_rows() %>%
as_tibble() %>%
filter(date %in% dates) %>%
# compute mean Rt by date and sample over all regions
group_by(date, sample) %>%
summarise(mean_Rt = weighted.mean(Rt, pop)) %>%
pivot_wider(names_from = date, values_from = mean_Rt) %>%
select(-sample) %>%
as.matrix() %>%
# extract quantiles from generated "samples"
extract_mcmc_quantiles("Rt_mean", quantile_names=c(0.025, 0.975, 0.25, 0.75, "median")) %>%
mutate(date = dates) %>%
select(date, everything(), -day_id)
}
Rt <- stanfit$Rt_adj
DATES <- c(
ymd("2020-03-16"),
ymd("2020-03-17"),
ymd("2020-05-11")
# today()
)
dataset %>% avg_national_Rt(Rt, ifr_pop_data, DATES)
regional_data %>%
filter(date == LOCKDOWN_END) %>%
select(region, variable_name, everything(), -day_id, -date)
pop_total <- sum(ifr_pop_data$pop)
fr_cases <- national_data %>%
filter(variable_name == "predicted_cases_csum", date == LOCKDOWN_END)
percent_cases <- fr_cases %>%
mutate_if(is.numeric, ~. / pop_total) %>%
mutate(variable_name = "total_cases_percent")
rbind(fr_cases, percent_cases) %>% select(-date)
national_data %>%
filter(variable_name == "predicted_deaths_csum") %>%
mutate(
error = median - obs_deaths_csum,
error_rel = error/obs_deaths_csum*100
) %>%
filter(month(date) == 4) %>%
filter(error %in% c(min(error), max(error)))
dataset %>% select(-t) %>%
group_by(region, region_id, day_id, date, deaths) %>%
group_modify(function(region_data, keys){
data.frame(
predicted = stanfit$predicted_deaths[, keys$day_id, keys$region_id],
sample_id = 1:dim(stanfit$predicted_deaths)[1]
)
}) %>%
mutate(squared_error = (deaths - predicted)^2) %>%
group_by(sample_id) %>%
summarise(squared_err = sum(squared_error)) %>%
pull(squared_err) %>%
quantile(c(0.025, 0.25, 0.5, 0.75, 0.975))
## 2.5% 25% 50% 75% 97.5%
## 209180 254995 282104 312676 373623
regional_data %>%
filter(variable_name == "predicted_deaths") %>%
mutate(squared_error = (deaths - median)^2 ) %>%
pull(squared_error) %>%
sum(na.rm = T)
## [1] 125568