papros
library(papros)
library(magrittr)
library(tidyr)
Now we can download the weather data, which are interesting for the prediction of the pathogen powdery mildew. The weather data are separated in historical data and recent data. The historical data reach longer in the past and are no longer updated in contradiction to the recent data.
if(!file.exists("./data/rdsfiles/hist_weath_sh.rds")){
hist_weath_sh <- download_statewide_hourly_station_data(state = "Schleswig-Holstein",
time = "historical",
coord = TRUE)
saveRDS(hist_weath_sh, "./data/rdsfiles/hist_weath_sh.rds")
}else{
hist_weath_sh <- readRDS("./data/rdsfiles/hist_weath_sh.rds")
}
if(!file.exists("./data/rdsfiles/rec_weath_sh.rds")){
rec_weath_sh <- download_statewide_hourly_station_data(state = "Schleswig-Holstein",
time = "recent",
coord = TRUE)
saveRDS(rec_weath_sh, "./data/rdsfiles/rec_weath_sh.rds")
}else{
rec_weath_sh <- readRDS("./data/rdsfiles/rec_weath_sh.rds")
}
weath_sh <- rbind(hist_weath_sh, rec_weath_sh) %>%
dplyr::distinct()
Since we only have infestation data from 1996 on (sorry I can not supply these to you) we select the weather data from the first october of 1995 on. Why from this date? Because we use the function large_ctu
to calculate the Cumulative Thermal Unit, which represents the growth of wheat for our example and we assume that the wheat has been sown on this date.
if(!file.exists("./data/rdsfiles/weath_sh.rds")){
weath_sh %<>% dplyr::filter(DateTime > 1995093023) %>%
dplyr::mutate(Date = as.Date(substr(DateTime,1,8),"%Y%m%d")) %>%
dplyr::mutate(CTU = large_ctu(dataset = .,
temp_column = "Temperature",
date_column = "Date",
start_date = "10-01",
location_column = "ID")) %>%
dplyr::select(-Date)
saveRDS(weath_sh, "./data/rdsfiles/weath_sh.rds")
}else{
weath_sh <- readRDS("./data/rdsfiles/weath_sh.rds")
}
head(weath_sh)
## ID DateTime Temperature Humidity Precipitation WindSpeed WindDirection
## 1 52 1995100100 NA NA NA 1.0 270
## 2 52 1995100101 NA NA NA 1.5 270
## 3 52 1995100102 NA NA NA 1.5 270
## 4 52 1995100103 NA NA NA 2.0 280
## 5 52 1995100104 NA NA NA 2.0 280
## 6 52 1995100105 NA NA NA 1.5 280
## lat lon CTU
## 1 53.6623 10.199 NA
## 2 53.6623 10.199 NA
## 3 53.6623 10.199 NA
## 4 53.6623 10.199 NA
## 5 53.6623 10.199 NA
## 6 53.6623 10.199 NA
In addition to the hourly weahter data we downloaded before, we add some multi-annual climate data, since we assume, that this also influences the spread of the pathogen.
library(raster)
if(!file.exists("./data/rdsfiles/coraster.rds")){
germ <- raster::getData('GADM', country='DEU', level=1)
sh <- germ[which(germ$NAME_1=="Schleswig-Holstein"),]
coraster <- raster::stack(download_dwd_raster(parameter = "air_temperature_mean", period = "1961-1990", month = 17, crop=sh),
download_dwd_raster(parameter = "precipitation", period = "1961-1990", month = 17, crop=sh),
download_dwd_raster(parameter = "water_balance", period = "1961-1990", month = 17, crop=sh))
coraster <- raster::projectRaster(coraster,crs=CRS("+init=epsg:25832"))
saveRDS(coraster, "./data/rdsfiles/coraster.rds")
}else{
coraster <- readRDS("./data/rdsfiles/coraster.rds")
}
library(mapview)
mapview(coraster[[1]])+mapview(coraster[[2]])+mapview(coraster[[3]])
Now we include the infestation data and create a SpatialPointsDataFrame
of them.
library(sp)
inf_dat <- read.csv2("./data/raw_data/spatial/infestation_data_2.csv") %>%
dplyr::mutate(Date = as.Date(as.character(Date,"%Y-%m-%d")))
head(inf_dat)
## Location x y Date Infection_PM Infection_BR
## 1 Barlt 501463.9 5985602 1995-05-22 0.0 0
## 2 Barlt 501463.9 5985602 1995-04-24 0.0 0
## 3 Barlt 501463.9 5985602 1995-05-01 0.0 0
## 4 Barlt 501463.9 5985602 1995-05-08 0.0 0
## 5 Barlt 501463.9 5985602 1995-05-15 0.0 0
## 6 Barlt 501463.9 5985602 1996-06-10 63.3 0
inf_dat_sp <- SpatialPointsDataFrame(coords = inf_dat[,c("x","y")],
data = inf_dat,
proj4string = CRS(("+init=epsg:25832")))
# Remove duplicated locations for interpolation
inf_dat_sp_single <- inf_dat_sp[!duplicated(inf_dat_sp$Location),-c(5,6)]
# Add Covariable raster data
inf_dat_sp_single <- raster::extract(coraster,inf_dat_sp_single,sp=TRUE)
inf_dat_sp_single
## class : SpatialPointsDataFrame
## features : 12
## extent : 491597.4, 643556.4, 5957153, 6054210 (xmin, xmax, ymin, ymax)
## coord. ref. : +init=epsg:25832 +proj=utm +zone=32 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## variables : 7
## names : Location, x, y, Date, air_temp_mean_1961.1990_17, precipitation_1961.1990_17, water_balance_1961.1990_17
## min values : Barlt, 491597.4, 5957153, 1995-04-24, 81.00000, 575.8416, -4.074289
## max values : Sönke-Nissen-Koog, 643556.4, 6054210, 1996-05-06, 85.88474, 826.9701, 256.541322
For the prediction of the infestations we need to know, how the weather was at the locations we now about. Therefore we need to interoplate the weahter data. In this example we tell the function aggregate_interpolate_points
to interpolate all wehater parameters trying Ordinary Kriging and if this should not work, trying Inverse Distance Weighting. Of course, for only one interoplation we would manually create and fit a nice variogram, but remeber that we have hourly weather data of 20 years! Therfore we use automated fitting algorithms.Also in this step included is the aggregation and reduction of the dataset. The function reduce_input
only selects weahter data of the dates, relevant for our infestation data. But why do we use this specific function? Because we assume a temporal difference between the observation of the infestation (the incubation period) and the time span (infection period) during which the weather was important for the infestation. Therefore these parameters are included in the functions as well.
if(!file.exists("./data/rdsfiles/weath_sh_pm_int.rds")){
# How many interpolations would be carried out?
length(unique(weath_sh$DateTime))
# Still quite large, better to reduce the dataset to the times relevant for the infestation by powdery mildew:
weath_sh_pm <- reduce_input(dataframe = weath_sh,
DateTime = "DateTime",
infection = 2,
incubation = 8,
event_dates = inf_dat$Date)
length(unique(weath_sh_pm$DateTime))
weath_sh_pm_int <- aggregate_interpolate_points(dataframe = weath_sh_pm,
coords = c("lon","lat"),
DateTime = "DateTime",
infection=2,
incubation=8,
aim_variable =c("Temperature","Humidity","Precipitation","WindSpeed","CTU"),
outputfile=inf_dat_sp_single,
co_variables = FALSE,
procedure = c("ok","idw"),
epsg = 4326,
trans_epsg = 25832)
saveRDS(weath_sh_pm_int, "./data/rdsfiles/weath_sh_pm_int.rds")
}else{
weath_sh_pm_int <- readRDS("./data/rdsfiles/weath_sh_pm_int.rds")
}
After interpolation we combine the interpolated and aggregated weather data and the infestation data.
if(!file.exists("./data/rdsfiles/pm_dieas_weath.rds")){
pm_dieas_weath <- do.call("rbind",Map(function(x){x@data},x=weath_sh_pm_int)) %>%
dplyr::mutate(Date = as.Date(Date,"%Y%m%d")) %>%
dplyr::inner_join(inf_dat, by=c("x","y","Date"))
saveRDS(pm_dieas_weath, "./data/rdsfiles/pm_dieas_weath.rds")
}else{
pm_dieas_weath <- readRDS("./data/rdsfiles/pm_dieas_weath.rds")
}
head(pm_dieas_weath)
## x y Location.x Date air_temp_mean_1961.1990_17
## 1 501463.9 5985602 Barlt 1996-05-06 82.02993
## 2 568733.7 6032253 Birkenmoor 1996-05-06 83.02643
## 3 534391.2 5962184 Elskop 1996-05-06 84.05234
## 4 643556.4 6037650 Fehmarn 1996-05-06 83.91584
## 5 606426.2 6017214 Futterkamp 1996-05-06 84.96794
## 6 602845.9 5957153 Kastorf 1996-05-06 82.94788
## precipitation_1961.1990_17 water_balance_1961.1990_17 x.1 y.1
## 1 826.9701 253.942861 501463.9 5985602
## 2 745.6341 212.458408 568733.7 6032253
## 3 784.9020 224.867630 534391.2 5962184
## 4 575.8416 -4.074289 643556.4 6037650
## 5 649.4213 114.492959 606426.2 6017214
## 6 703.5854 128.726482 602845.9 5957153
## Temperature_mean Temperature_min Temperature_max Humidity_mean
## 1 6.855308 2.342857 11.65714 77.97669
## 2 6.855308 2.342858 11.65714 77.97669
## 3 6.855308 2.342857 11.65714 77.97669
## 4 6.855308 2.342857 11.65714 77.97669
## 5 6.855308 2.342857 11.65714 77.97669
## 6 7.033559 2.255044 11.92703 78.03158
## Humidity_min Humidity_max Precipitation_mean Precipitation_min
## 1 55.85714 95.71429 0.005483067 0
## 2 55.85714 95.71429 0.012549603 0
## 3 55.85714 95.71429 0.012549603 0
## 4 55.85714 95.71429 0.018502804 0
## 5 55.85714 95.71429 0.012549603 0
## 6 55.64967 95.74199 0.012549603 0
## Precipitation_max WindSpeed_mean WindSpeed_min WindSpeed_max CTU_mean
## 1 0.1582485 3.389896 0.39219105 6.069603 574.0330
## 2 0.3285714 3.435478 1.24026391 5.926431 685.0949
## 3 0.3285714 2.461352 0.01984686 4.983534 545.5942
## 4 0.7411306 5.156062 1.62160576 9.425685 576.2264
## 5 0.3285714 3.555251 1.14021058 5.355196 569.1313
## 6 0.3285714 1.991932 0.60541794 3.692307 469.4606
## CTU_min CTU_max Location.y Infection_PM Infection_BR
## 1 541.3953 699.5505 Barlt 3.3 0
## 2 587.2715 699.5505 Birkenmoor 76.7 0
## 3 507.3262 693.2088 Elskop 0.0 0
## 4 544.1525 699.5505 Fehmarn 73.3 0
## 5 535.3114 699.5505 Futterkamp 80.0 0
## 6 440.3442 587.2715 Kastorf 50.0 0
Then we let the machine (here the Random Forest algorithm) learn under which circumstances dangerous events (defined by us as disease incidence larger than 70) occured. By now I did not include the enhancement methods I created during my thesis, but this will happen soon.
pm_dieas_weath %<>% na.omit() %>%
dplyr::mutate(Danger = Infection_PM) %>%
dplyr::mutate(Danger = as.factor(ifelse(Danger<70,0,1)))
rf_pred_pm <- machine_learner(dataframe=pm_dieas_weath,
aim_variable="Danger",
co_variables=c("Temperature_mean","Temperature_min","Temperature_max",
"Humidity_mean","Humidity_min","Humidity_max",
"Precipitation_mean","Precipitation_min","Precipitation_max",
"WindSpeed_mean","WindSpeed_min","WindSpeed_max","air_temp_mean_1961.1990_17",
"precipitation_1961.1990_17","water_balance_1961.1990_17","CTU_mean"),
method = "RF")
To create spatio-temporal predictions we interpolate the weather data of some days not only for some points but for the whole area of Schleswig-Holstein and use the machine_predictor
to predict the model generated using the machine_learner
on the raster stacks.
# Lets try a spatial prediction for some days
rec_weath_sh <- readRDS("./data/rdsfiles/rec_weath_sh.rds")
rec_weath_sh %<>% dplyr::mutate(Date = as.Date(substr(DateTime,1,8),"%Y%m%d")) %>%
dplyr::mutate(CTU = large_ctu(dataset = .,
temp_column = "Temperature",
date_column = "Date",
start_date = "10-01",
location_column = "ID")) %>%
dplyr::select(-Date) %>%
reduce_input(dataframe = .,
DateTime = "DateTime",
infection = 2,
incubation = 8,
event_dates = seq(from = as.Date("2018-06-01","%Y-%m-%d"),
to = as.Date("2018-06-15","%Y-%m-%d"),
by = 1))
if(!dir.exists("./data/rdsfiles/pred_weath_sh_pm_int")){
pred_weath_sh_pm_int <- aggregate_interpolate_points(dataframe = rec_weath_sh,
coords = c("lon","lat"),
DateTime = "DateTime",
infection=2,
incubation=8,
aim_variable =c("Temperature","Humidity","Precipitation","WindSpeed","CTU"),
outputfile=coraster[[1]],
co_variables = FALSE,
procedure = c("ked","ok","idw"),
epsg = 4326,
trans_epsg = 25832)
store_rasterstacklist(rstacklist = pred_weath_sh_pm_int,
pathfolder = "./data/rdsfiles/pred_weath_sh_pm_int")
}else{
pred_weath_sh_pm_int <- read_rasterstacklist(pathfolder = "./data/rdsfiles/pred_weath_sh_pm_int")
}
pm_predicted_stack <- machine_predictor(rstack = pred_weath_sh_pm_int,
mmodel = rf_pred_pm$RF,
additionalRaster = coraster,
type = "prob",
index = 2)
plot(pm_predicted_stack)
The videoplot_rasterstack
allows us to use the animation package (which itself uses ffmpeg) to creat a nice video of our predictions.
videoplot_rasterstack(rstack = pm_predicted_stack,
ffmpeg_path = paste0(getwd(),"/data/ffmpeg-20150911-git-f58e011-win64-static/bin/ffmpeg.exe"),
storefile = "./plots/powdery_mildew_pred.mp4",
col=colorRampPalette(c("#1a9850","#66bd63","#a6d96a","#d9ef8b","#fee08b","#fdae61","#f46d43","#d73027"))(8))
library(shinyLP)
iframe(width = "415", height = "415",
url_link = "./plots/powdery_mildew_pred.mp4")
The machine_predictor_lineplot
allows location specific plots for points and polygons:
library(sf)
library(ggplot2)
plz <- read_sf("./data/raw_data/spatial/plz5sh-stellig.gpkg") %>%
as(.,"Spatial")
se <- 155
plot(plz)
plot(plz[se,],add=TRUE,col="red")
machine_predictor_lineplot(rstack = pm_predicted_stack,
location = plz[se,],
yname ="Probability of endangering events",
ylim=c(0,1),
rollingaverage = 1,
threshold = 0.5)