suppressPackageStartupMessages({
library(tidyverse)
library(ggpubr)
library(lubridate)
library(knitr)
library(data.table)
source("scripts/plots/utils/utils.R")
source("scripts/data_preproc/preproc_functions.R")
source("scripts/mixture_analysis/functions.R")
})
options(digits=4)
knitr::opts_chunk$set(cache=TRUE)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
suppressMessages(
dataset <-
read_csv(
"results/merge_spf_opencovid_agences_regionales_sante/mergedDataReadyToAnalyze.csv"
) %>%
prepare_french_region_dataset(keep.all = F) %>%
group_by(region, region_id) %>%
group_modify(function (region_data, keys) {
message(keys$region %>% str_to_upper())
message(" + PADDING : ")
region_data <- pad_region_data(region_data)
message(" + SLICING : ")
region_data <- slice_from_onset(region_data)
return(region_data)
}) %>%
select(-t)
)
## Load french region population data
ifr_pop_data <-
read.csv("data/region_wise_IFR.csv", sep=";") %>%
prepare_regions_pop_ifr()
LOCKDOWN_END <- ymd("2020-05-11")
LOCKDOWN <- ymd("2020-03-17")
extract_prefix_fit <- function(filename, dataset){
loaded_data <- new.env()
load(filename, envir = loaded_data)
stanfit <- loaded_data$out %>% add_sample_deaths_negbinom()
dataset %>% filter(date %>% between(LOCKDOWN_END - 7, LOCKDOWN_END)) %>%
group_by_all() %>%
group_modify(function(region_data, keys){
data.frame(
deaths_pred = stanfit$predicted_deaths[, keys$day_id, keys$region_id],
sample_id = 1:dim(stanfit$predicted_deaths)[1]
)
})
}
squared_errors_quantiles <- function(fit_dataset){
fit_dataset %>%
mutate(squared_error = (deaths - deaths_pred)^2) %>%
group_by(sample_id) %>%
summarise(squared_err = sum(squared_error)) %>%
pull(squared_err) %>%
quantile(c(0.025, 0.25, 0.5, 0.75, 0.975))
}
"results/region-sparse-lockdown-skipweek1.Rdata" %>%
extract_prefix_fit(dataset) %>%
squared_errors_quantiles()
## `summarise()` ungrouping output (override with `.groups` argument)
## 2.5% 25% 50% 75% 97.5%
## 7145 10062 12284 15527 25591
obs_deaths_dataset <- dataset %>%
ungroup() %>%
filter(date %>% between(LOCKDOWN_END-7, LOCKDOWN_END)) %>%
select(region_id, date, deaths)
mixture %>% filter(date %>% between(LOCKDOWN_END-7, LOCKDOWN_END)) %>%
select(-deaths) %>%
left_join(obs_deaths_dataset) %>%
mutate(squared_error=(deaths - deaths_pred)^2) %>%
group_by(sample_id) %>%
summarise(squared_err = sum(squared_error, na.rm=T)) %>%
pull(squared_err) %>%
quantile(c(0.025, 0.25, 0.5, 0.75, 0.975))
## Joining, by = c("region_id", "date")
## `summarise()` ungrouping output (override with `.groups` argument)
## 2.5% 25% 50% 75% 97.5%
## 7488 10460 13174 18410 34739
"results/region-sparse-lockdown-skipweek2.Rdata" %>%
extract_prefix_fit(dataset) %>%
squared_errors_quantiles()
## `summarise()` ungrouping output (override with `.groups` argument)
## 2.5% 25% 50% 75% 97.5%
## 8091 11711 14742 19413 36754
"results/region-sparse-lockdown-skipweek4.Rdata" %>%
extract_prefix_fit(dataset) %>%
squared_errors_quantiles()
## `summarise()` ungrouping output (override with `.groups` argument)
## 2.5% 25% 50% 75% 97.5%
## 11426 15098 17800 21264 27847