Step generation for fitting water buffalo step selection models

Author

Scott Forrest

Published

September 4, 2024

Here we are generating random steps for fitting step selection models, and plotting the outputs of the step generation. We are using a Gamma distribution for the step lengths and a von Mises distribution for the turning angles.

1 Setup packages and import data

1.1 Setup packages

Load packages

library(tidyverse)
packages <- c("amt", "lubridate", "terra", "beepr", "tictoc")
walk(packages, require, character.only = T)

1.2 Import data and clean

Import data

buffalo <- read_csv("data/buffalo.csv")

# remove individuals that have poor data quality or less than about 3 months of data. 
# The "2014.GPS_COMPACT copy.csv" string is a duplicate of ID 2024, so we exclude it
buffalo <- buffalo %>% filter(!node %in% c("2014.GPS_COMPACT copy.csv", 
                                           2029, 2043, 2265, 2284, 2346))

buffalo <- buffalo %>%  
  group_by(node) %>% 
  arrange(DateTime, .by_group = T) %>% 
  distinct(DateTime, .keep_all = T) %>% 
  arrange(node) %>% 
  mutate(ID = node)

buffalo_clean <- buffalo[, c(12, 2, 4, 3)]
colnames(buffalo_clean) <- c("id", "time", "lon", "lat")
attr(buffalo_clean$time, "tzone") <- "Australia/Queensland"
head(buffalo_clean)

Import data

tz(buffalo_clean$time)

[1] "Australia/Queensland"

Import data

buffalo_ids <- unique(buffalo_clean$id)

2 Create random steps for fitting step selection models

2.1 Setup trajectory

Use the amt package to create a trajectory object from the cleaned data.

Create a trajectory object

buffalo_all <- buffalo_clean %>% mk_track(id = id,
                                           lon,
                                           lat, 
                                           time, 
                                           all_cols = T,
                                           crs = 4326) %>% 
  transform_coords(crs_to = 3112, crs_from = 4326) # Transformation to GDA94 / 
# Geoscience Australia Lambert (https://epsg.io/3112)

# plot the data spatially coloured by time
buffalo_all %>%
  ggplot(aes(x = x_, y = y_, colour = t_)) +
  geom_point(alpha = 0.25, size = 1) + # colour = "black",
  coord_fixed() +
  scale_colour_viridis_c() +
  theme_classic()

2.2 Prepare environmental covariates

To determine required extent of raster to crop environmental variables to (with a buffer to accommodate the random steps)

Create a template raster for cropping environmental covariates

min(buffalo_all$x_)

[1] 5338.446

Create a template raster for cropping environmental covariates

max(buffalo_all$x_)

[1] 52902.61

Create a template raster for cropping environmental covariates

min(buffalo_all$y_)

[1] -1457923

Create a template raster for cropping environmental covariates

max(buffalo_all$y_)

[1] -1411516

Create a template raster for cropping environmental covariates

template_raster_cropped <- terra::rast(xmin = 0, 
                               xmax = 60000, 
                               ymin = -1463000, 
                               ymax = -1406000, 
                               resolution = 25, 
                               crs = "epsg:3112")

template_wgs84 <- project(template_raster_cropped, "epsg:4326")
terra::ext(template_wgs84)

SpatExtent : 134, 134.541776667329, -12.5424362272288, -12.0361968908013 (xmin, xmax, ymin, ymax)

2.2.1 Reading in the environmental covariates

Import environmental covariates

ndvi_projected <- rast("mapping/cropped rasters/ndvi_GEE_projected_watermask20230207.tif")
time(ndvi_projected)

 [1] "2018-01-01" "2018-02-01" "2018-03-01" "2018-04-01" "2018-05-01"
 [6] "2018-06-01" "2018-07-01" "2018-08-01" "2018-09-01" "2018-10-01"
[11] "2018-11-01" "2018-12-01" "2019-01-01" "2019-02-01" "2019-03-01"
[16] "2019-04-01" "2019-05-01" "2019-06-01" "2019-07-01" "2019-08-01"
[21] "2019-09-01" "2019-10-01" "2019-11-01" "2019-12-01"

Import environmental covariates

slope <- rast("mapping/cropped rasters/slope_raster.tif")
veg_herby <- rast("mapping/cropped rasters/veg_herby.tif")
canopy_cover <- rast("mapping/cropped rasters/canopy_cover.tif")

# change the names (these will become the column names when extracting 
# covariate values at the used and random steps)
names(ndvi_projected) <- rep("ndvi", terra::nlyr(ndvi_projected))
names(slope) <- "slope"
names(veg_herby) <- "veg_herby"
names(canopy_cover) <- "canopy_cover"

# plot the rasters
plot(ndvi_projected)

Import environmental covariates

plot(slope)

Import environmental covariates

plot(veg_herby)

Import environmental covariates

plot(canopy_cover)

2.3 Creating a step object (by bursts)

Create steps by burst

# nest the data by individual
buffalo_all_nested <- buffalo_all %>% arrange(id) %>% nest(data = -"id")

buffalo_all_nested_steps_by_burst <- buffalo_all_nested %>%
  mutate(steps = map(data, function(x)
    x %>% track_resample(rate = hours(1), tolerance = minutes(10)) %>%
      # to filter out bursts with less than 3 locations
      # amt::filter_min_n_burst(min_n = 3) %>% 
      steps_by_burst()))

# unnest the data after creating 'steps' objects
buffalo_all_steps_by_burst <- buffalo_all_nested_steps_by_burst %>% 
  amt::select(id, steps) %>% 
  amt::unnest(cols = steps)

buffalo_all_steps_by_burst <- buffalo_all_steps_by_burst %>% 
  mutate(t2_rounded = round_date(t2_, "hour"), # round the time to the nearest hour
         hour_t2 = ifelse(hour(t2_rounded) == 0, 24, hour(t2_rounded))) # change the 0 hour to 24

head(buffalo_all_steps_by_burst, 10)

2.4 Fitting step length and turning angle distributions

Fitting exponential and von Mises distributions to the steps of ALL individuals (only one Gamma and one von Mises distribution for the whole population). This should be done when fitting a hierarchical model to update the ‘population’ parameters, but also makes it straightforward to update after model fitting to each individual separately.

Fit step length and turning angle distributions

# fitting step length and turning angle distributions to all locations
gamma_dist <- fit_distr(buffalo_all_steps_by_burst$sl_, "gamma")
vonmises_dist <- fit_distr(buffalo_all_steps_by_burst$ta_, "vonmises")

# checking parameters - which can then be saved to update movement parameters 
# after fitting the step selection model
gamma_dist$params$shape

[1] 0.438167

Fit step length and turning angle distributions

gamma_dist$params$scale

[1] 534.3508

Fit step length and turning angle distributions

vonmises_dist$params$kappa

[1] 0.1848126

Fit step length and turning angle distributions

vonmises_dist$params$mu

Circular Data: 
Type = angles 
Units = radians 
Template = none 
Modulo = asis 
Zero = 0 
Rotation = counter 
[1] 0

For some reason, the random_steps function does not work when using the bursted_steps_xyt class. I’m not sure why (the error is Error in bursts[[i]] : subscript out of bounds), but it works when that class label is removed, and appears to sample random steps correctly. For taxa that have irregular fixes and many bursts, this may be worth exploring in more detail.

Create random steps

class(buffalo_all_steps_by_burst)

[1] "bursted_steps_xyt" "steps_xyt"         "steps_xy"         
[4] "tbl_df"            "tbl"               "data.frame"

Create random steps

class(buffalo_all_steps_by_burst) <- class(buffalo_all_steps_by_burst)[-1]
class(buffalo_all_steps_by_burst)

[1] "steps_xyt"  "steps_xy"   "tbl_df"     "tbl"        "data.frame"

Create random steps

tic()
buffalo_parametric_popn_GvM <- buffalo_all_steps_by_burst %>% 
  random_steps(n_control = 10,
               sl_distr = gamma_dist,
               ta_distr = vonmises_dist) %>% 
  mutate(y = as.numeric(case_))
toc()

97.31 sec elapsed

2.5 Plotting the random step distributions

Spatially plot the used and random steps, with the used steps in red.

Plot the random steps

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_point(data = . %>% filter(y == 0), aes(x = x2_, y = y2_), 
             colour = "black", size = 0.1, alpha = 0.1) +
  geom_point(data = . %>% filter(y == 1), aes(x = x2_, y = y2_), 
             colour = "red", size = 0.1, alpha = 0.1) +
  coord_equal() +
  theme_bw()

Gamma distribution of the step lengths

Plot the step length density

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = sl_, fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = sl_), colour = "red") +
  scale_y_continuous("Density") +
  scale_x_continuous("Step length (m)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none")

Plot the step length density

# ggsave(paste0("outputs/plots/manuscript_figs/step_length_density_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

When log-transforming the step length, it is clear that there is a bimodal pattern to the step lengths, which is not captured by the Gamma distribution of the random steps.

Plot the log of the step length density

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = log(sl_), fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = log(sl_)), colour = "red") +
  scale_y_continuous("Density") +
  scale_x_continuous("Log of step length (m)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none")

Plot the log of the step length density

# ggsave(paste0("outputs/plots/manuscript_figs/step_length_density_log_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

Plot the von Mises distribution of the turning angles

Plot the turning angle density

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = ta_, fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = ta_), colour = "red", 
               alpha = 0.01) +
  scale_y_continuous("Density") +
  scale_x_continuous("Turning angle (rad)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none")

Warning: Removed 8053 rows containing non-finite outside the scale range
(`stat_density()`).

Plot the turning angle density

# ggsave(paste0("outputs/plots/manuscript_figs/turning angle_density_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

2.6 Sample values of the environmental covariates at the end of the steps.

Extract covariates at the end of the steps

buffalo_parametric_popn_covs <- buffalo_parametric_popn_GvM %>% 
  
  extract_covariates_var_time(ndvi_projected,
                              where = "end",
                              when = "any",
                              max_time = days(15),
                              name_covar = "ndvi_temporal") %>% 
  extract_covariates(veg_herby,
                     where = "end") %>%
  extract_covariates(canopy_cover,
                     where = "end") %>%
  extract_covariates(slope,
                     where = "end") %>% 
  
  mutate(y = as.numeric(case_),
         cos_ta_ = cos(ta_),
         log_sl_ = log(sl_))

# write the output to a csv file - GvM indicates Gamma - von Mises for step length and turning angle distributions
write_csv(buffalo_parametric_popn_covs, 
          paste0("outputs/buffalo_parametric_popn_covs_GvM_10rs_", Sys.Date(), ".csv"))

beep(sound = 2)

---
title: "Step generation for fitting water buffalo step selection models"
author: "Scott Forrest"
date: "`r Sys.Date()`"
execute: 
  cache: false
toc: true
number-sections: true
format: 
  html:
    self-contained: true
    code-fold: show 
    code-tools: true
    df-print: paged
    code-line-numbers: true
    code-overflow: scroll
  pdf:
    geometry: 
      - top=30mm
      - left=30mm
editor:
  source
---

Here we are generating random steps for fitting step selection models, and plotting the outputs of the step generation. We are using a Gamma distribution for the step lengths and a von Mises distribution for the turning angles. 

```{r}
#| label: img_params
#| code-summary: "Setting image parameters"
#| include: false

knitr::opts_chunk$set(echo = TRUE,
                      dev = 'png',
                      dpi = 300)

```

# Setup packages and import data

## Setup packages

```{r}
#| label: packages
#| code-summary: "Load packages"
#| message: false
#| warning: false

library(tidyverse)
packages <- c("amt", "lubridate", "terra", "beepr", "tictoc")
walk(packages, require, character.only = T)

```

## Import data and clean

```{r}
#| label: import_data
#| code-summary: "Import data"
#| message: false
#| warning: false

buffalo <- read_csv("data/buffalo.csv")

# remove individuals that have poor data quality or less than about 3 months of data. 
# The "2014.GPS_COMPACT copy.csv" string is a duplicate of ID 2024, so we exclude it
buffalo <- buffalo %>% filter(!node %in% c("2014.GPS_COMPACT copy.csv", 
                                           2029, 2043, 2265, 2284, 2346))

buffalo <- buffalo %>%  
  group_by(node) %>% 
  arrange(DateTime, .by_group = T) %>% 
  distinct(DateTime, .keep_all = T) %>% 
  arrange(node) %>% 
  mutate(ID = node)

buffalo_clean <- buffalo[, c(12, 2, 4, 3)]
colnames(buffalo_clean) <- c("id", "time", "lon", "lat")
attr(buffalo_clean$time, "tzone") <- "Australia/Queensland"
head(buffalo_clean)
tz(buffalo_clean$time)

buffalo_ids <- unique(buffalo_clean$id)

```

# Create random steps for fitting step selection models

## Setup trajectory

Use the `amt` package to create a trajectory object from the cleaned data. 

```{r}
#| label: make_track
#| code-summary: "Create a trajectory object"

buffalo_all <- buffalo_clean %>% mk_track(id = id,
                                           lon,
                                           lat, 
                                           time, 
                                           all_cols = T,
                                           crs = 4326) %>% 
  transform_coords(crs_to = 3112, crs_from = 4326) # Transformation to GDA94 / 
# Geoscience Australia Lambert (https://epsg.io/3112)

# plot the data spatially coloured by time
buffalo_all %>%
  ggplot(aes(x = x_, y = y_, colour = t_)) +
  geom_point(alpha = 0.25, size = 1) + # colour = "black",
  coord_fixed() +
  scale_colour_viridis_c() +
  theme_classic()

```

## Prepare environmental covariates

To determine required extent of raster to crop environmental variables to (with a buffer to accommodate the random steps)

```{r}
#| label: template_raster
#| code-summary: "Create a template raster for cropping environmental covariates"

min(buffalo_all$x_) 
max(buffalo_all$x_)
min(buffalo_all$y_)
max(buffalo_all$y_)

template_raster_cropped <- terra::rast(xmin = 0, 
                               xmax = 60000, 
                               ymin = -1463000, 
                               ymax = -1406000, 
                               resolution = 25, 
                               crs = "epsg:3112")

template_wgs84 <- project(template_raster_cropped, "epsg:4326")
terra::ext(template_wgs84)

```

### Reading in the environmental covariates

```{r}
#| label: import_env_covs
#| code-summary: "Import environmental covariates"

ndvi_projected <- rast("mapping/cropped rasters/ndvi_GEE_projected_watermask20230207.tif")
time(ndvi_projected)

slope <- rast("mapping/cropped rasters/slope_raster.tif")
veg_herby <- rast("mapping/cropped rasters/veg_herby.tif")
canopy_cover <- rast("mapping/cropped rasters/canopy_cover.tif")

# change the names (these will become the column names when extracting 
# covariate values at the used and random steps)
names(ndvi_projected) <- rep("ndvi", terra::nlyr(ndvi_projected))
names(slope) <- "slope"
names(veg_herby) <- "veg_herby"
names(canopy_cover) <- "canopy_cover"

# plot the rasters
plot(ndvi_projected)
plot(slope)
plot(veg_herby)
plot(canopy_cover)

```

## Creating a step object (by bursts)

```{r}
#| label: steps_by_burst
#| code-summary: "Create steps by burst"

# nest the data by individual
buffalo_all_nested <- buffalo_all %>% arrange(id) %>% nest(data = -"id")

buffalo_all_nested_steps_by_burst <- buffalo_all_nested %>%
  mutate(steps = map(data, function(x)
    x %>% track_resample(rate = hours(1), tolerance = minutes(10)) %>%
      # to filter out bursts with less than 3 locations
      # amt::filter_min_n_burst(min_n = 3) %>% 
      steps_by_burst()))

# unnest the data after creating 'steps' objects
buffalo_all_steps_by_burst <- buffalo_all_nested_steps_by_burst %>% 
  amt::select(id, steps) %>% 
  amt::unnest(cols = steps)

buffalo_all_steps_by_burst <- buffalo_all_steps_by_burst %>% 
  mutate(t2_rounded = round_date(t2_, "hour"), # round the time to the nearest hour
         hour_t2 = ifelse(hour(t2_rounded) == 0, 24, hour(t2_rounded))) # change the 0 hour to 24

head(buffalo_all_steps_by_burst, 10)

```

## Fitting step length and turning angle distributions

Fitting exponential and von Mises distributions to the steps of ALL individuals (only one Gamma and one von Mises distribution for the whole population). This should be done when fitting a hierarchical model to update the 'population' parameters, but also makes it straightforward to update after model fitting to each individual separately.

```{r}
#| label: fit_distributions
#| code-summary: "Fit step length and turning angle distributions"

# fitting step length and turning angle distributions to all locations
gamma_dist <- fit_distr(buffalo_all_steps_by_burst$sl_, "gamma")
vonmises_dist <- fit_distr(buffalo_all_steps_by_burst$ta_, "vonmises")

# checking parameters - which can then be saved to update movement parameters 
# after fitting the step selection model
gamma_dist$params$shape
gamma_dist$params$scale
vonmises_dist$params$kappa
vonmises_dist$params$mu

```

For some reason, the `random_steps` function does not work when using the `bursted_steps_xyt` class. I'm not sure why (the error is `Error in bursts[[i]] : subscript out of bounds`), but it works when that class label is removed, and appears to sample random steps correctly. For taxa that have irregular fixes and many bursts, this may be worth exploring in more detail.

```{r}
#| label: random_steps
#| code-summary: "Create random steps"

class(buffalo_all_steps_by_burst)
class(buffalo_all_steps_by_burst) <- class(buffalo_all_steps_by_burst)[-1]
class(buffalo_all_steps_by_burst)

tic()
buffalo_parametric_popn_GvM <- buffalo_all_steps_by_burst %>% 
  random_steps(n_control = 10,
               sl_distr = gamma_dist,
               ta_distr = vonmises_dist) %>% 
  mutate(y = as.numeric(case_))
toc()

```

## Plotting the random step distributions

Spatially plot the used and random steps, with the used steps in red.

```{r}
#| label: plot_steps
#| code-summary: "Plot the random steps"

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_point(data = . %>% filter(y == 0), aes(x = x2_, y = y2_), 
             colour = "black", size = 0.1, alpha = 0.1) +
  geom_point(data = . %>% filter(y == 1), aes(x = x2_, y = y2_), 
             colour = "red", size = 0.1, alpha = 0.1) +
  coord_equal() +
  theme_bw()

```

Gamma distribution of the step lengths

```{r}
#| label: plot_step_length_density
#| code-summary: "Plot the step length density"

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = sl_, fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = sl_), colour = "red") +
  scale_y_continuous("Density") +
  scale_x_continuous("Step length (m)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none")

# ggsave(paste0("outputs/plots/manuscript_figs/step_length_density_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

```

When log-transforming the step length, it is clear that there is a bimodal pattern to the step lengths, which is not captured by the Gamma distribution of the random steps.

```{r}
#| label: plot_step_length_density_log
#| code-summary: "Plot the log of the step length density"

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = log(sl_), fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = log(sl_)), colour = "red") +
  scale_y_continuous("Density") +
  scale_x_continuous("Log of step length (m)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none") 

# ggsave(paste0("outputs/plots/manuscript_figs/step_length_density_log_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

```

Plot the von Mises distribution of the turning angles

```{r}
#| label: plot_turning_angle_density
#| code-summary: "Plot the turning angle density"

buffalo_parametric_popn_GvM %>% ggplot() +
  geom_density(data = . %>% filter(y == 1), aes(x = ta_, fill = factor(id)), 
               alpha = 0.25) +
  geom_density(data = . %>% filter(y == 0), aes(x = ta_), colour = "red", 
               alpha = 0.01) +
  scale_y_continuous("Density") +
  scale_x_continuous("Turning angle (rad)") +
  scale_fill_viridis_d(direction = -1) +
  theme_classic() +
  theme(legend.position = "none") 

# ggsave(paste0("outputs/plots/manuscript_figs/turning angle_density_", Sys.Date(), ".png"), 
#        width=150, height=90, units="mm", dpi = 600)

```

## Sample values of the environmental covariates at the end of the steps.

```{r}
#| label: extract_covariates
#| code-summary: "Extract covariates at the end of the steps"

buffalo_parametric_popn_covs <- buffalo_parametric_popn_GvM %>% 
  
  extract_covariates_var_time(ndvi_projected,
                              where = "end",
                              when = "any",
                              max_time = days(15),
                              name_covar = "ndvi_temporal") %>% 
  extract_covariates(veg_herby,
                     where = "end") %>%
  extract_covariates(canopy_cover,
                     where = "end") %>%
  extract_covariates(slope,
                     where = "end") %>% 
  
  mutate(y = as.numeric(case_),
         cos_ta_ = cos(ta_),
         log_sl_ = log(sl_))

# write the output to a csv file - GvM indicates Gamma - von Mises for step length and turning angle distributions
write_csv(buffalo_parametric_popn_covs, 
          paste0("outputs/buffalo_parametric_popn_covs_GvM_10rs_", Sys.Date(), ".csv"))

beep(sound = 2)

```