% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootstrap_cube.R
\name{bootstrap_cube}
\alias{bootstrap_cube}
\title{Perform bootstrapping over a data cube for a calculated statistic}
\usage{
bootstrap_cube(
  data_cube,
  fun,
  ...,
  grouping_var,
  samples = 1000,
  ref_group = NA,
  seed = NA,
  progress = FALSE
)
}
\arguments{
\item{data_cube}{A data cube object (class 'processed_cube' or 'sim_cube',
see \code{b3gbi::process_cube()}) or a dataframe (from \verb{$data} slot of
'processed_cube' or 'sim_cube'). To limit runtime, we recommend using a
dataframe with custom function as \code{fun}.}

\item{fun}{A function which, when applied to \code{data_cube} returns the
statistic(s) of interest. This function must return a dataframe with a column
\code{diversity_val} containing the statistic of interest.}

\item{...}{Additional arguments passed on to \code{fun}.}

\item{grouping_var}{A character vector specifying the grouping variable(s)
for the bootstrap analysis. The function \code{fun(data_cube, ...)} should return
a row per group. The specified variables must not be redundant, meaning they
should not contain the same information (e.g., \code{"time_point"} (1, 2, 3) and
\code{"year"} (2000, 2001, 2002) should not be used together if \code{"time_point"} is
just an alternative encoding of \code{"year"}).}

\item{samples}{The number of bootstrap replicates. A single positive integer.
Default is 1000.}

\item{ref_group}{A string indicating the reference group to compare the
statistic with. Default is \code{NA}, meaning no reference group is used.}

\item{seed}{A positive numeric value setting the seed for random number
generation to ensure reproducibility. If \code{NA} (default), then \code{set.seed()}
is not called at all. If not \code{NA}, then the random number generator state is
reset (to the state before calling this function) upon exiting this function.}

\item{progress}{Logical. Whether to show a progress bar. Set to \code{TRUE} to
display a progress bar, \code{FALSE} (default) to suppress it.}
}
\value{
A dataframe containing the bootstrap results with the following
columns:
\itemize{
\item \code{sample}: Sample ID of the bootstrap replicate
\item \code{est_original}: The statistic based on the full dataset per group
\item \code{rep_boot}: The statistic based on a bootstrapped dataset (bootstrap
replicate)
\item \code{est_boot}: The bootstrap estimate (mean of bootstrap replicates per
group)
\item \code{se_boot}: The standard error of the bootstrap estimate (standard
deviation of the bootstrap replicates per group)
\item \code{bias_boot}: The bias of the bootstrap estimate per group
}
}
\description{
This function generate \code{samples} bootstrap replicates of a statistic applied
to a data cube. It resamples the data cube and computes a statistic \code{fun} for
each bootstrap replicate, optionally comparing the results to a reference
group (\code{ref_group}).
}
\details{
Bootstrapping is a statistical technique used to estimate the distribution of
a statistic by resampling with replacement from the original data
(Davison & Hinkley, 1997; Efron & Tibshirani, 1994).
In the case of data cubes, each row is sampled with replacement.
Below are the common notations used in bootstrapping:
\enumerate{
\item \strong{Original Sample Data}: \eqn{\mathbf{X} = \{X_1, X_2, \ldots, X_n\}}
\itemize{
\item The initial set of data points. Here, \eqn{n} is the sample
size. This corresponds to the number of cells in a data cube or the number
of rows in tabular format.
}
\item \strong{Statistic of Interest}: \eqn{\theta}
\itemize{
\item The parameter or statistic being estimated, such as the mean
\eqn{\bar{X}}, variance \eqn{\sigma^2}, or a biodiversity indicator. Let
\eqn{\hat{\theta}} denote the estimated value of \eqn{\theta} calculated
from the complete dataset \eqn{\mathbf{X}}.
}
\item \strong{Bootstrap Sample}: \eqn{\mathbf{X}^* = \{X_1^*, X_2^*, \ldots, X_n^*\}}
\itemize{
\item A sample of size \eqn{n} drawn with replacement from the original sample
\eqn{\mathbf{X}}. Each \eqn{X_i^*} is drawn independently from
\eqn{\mathbf{X}}.
\item A total of \eqn{B} bootstrap samples are drawn from the original data.
Common choices for \eqn{B} are 1000 or 10,000 to ensure a good
approximation of the distribution of the bootstrap replications (see
further).
}
\item \strong{Bootstrap Replication}: \eqn{\hat{\theta}^*_b}
\itemize{
\item The value of the statistic of interest calculated from the \eqn{b}-th
bootstrap sample \eqn{\mathbf{X}^*_b}. For example, if \eqn{\theta} is
the sample mean, \eqn{\hat{\theta}^*_b = \bar{X}^*_b}.
}
\item \strong{Bootstrap Statistics}:
}
\itemize{
\item \strong{Bootstrap Estimate of the Statistic}: \eqn{\hat{\theta}_{\text{boot}}}
\itemize{
\item The average of the bootstrap replications:
}
}

\deqn{\hat{\theta}_{\text{boot}} = \frac{1}{B} \sum_{b=1}^B \hat{\theta}^*_b}
\itemize{
\item \strong{Bootstrap Bias}: \eqn{\text{Bias}_{\text{boot}}}
\itemize{
\item This bias indicates how much the bootstrap estimate deviates from the
original sample estimate. It is calculated as the difference between the
average bootstrap estimate and the original estimate:
}
}

\deqn{\text{Bias}_{\text{boot}} = \frac{1}{B} \sum_{b=1}^B (\hat{\theta}^*_b - \hat{\theta}) = \hat{\theta}_{\text{boot}} - \hat{\theta}}
\itemize{
\item \strong{Bootstrap Standard Error}: \eqn{\text{SE}_{\text{boot}}}
\itemize{
\item The standard deviation of the bootstrap replications, which estimates
the variability of the statistic.
}
}
}
\examples{
\dontrun{
# After processing a data cube with b3gbi::process_cube()

# Function to calculate statistic of interest
# Mean observations per year
mean_obs <- function(data) {
  out_df <- aggregate(obs ~ year, data, mean) # Calculate mean obs per year
  names(out_df) <- c("year", "diversity_val") # Rename columns
  return(out_df)
}
mean_obs(processed_cube$data)

# Perform bootstrapping
bootstrap_mean_obs <- bootstrap_cube(
  data_cube = processed_cube$data,
  fun = mean_obs,
  grouping_var = "year",
  samples = 1000,
  seed = 123,
  progress = FALSE
)
head(bootstrap_mean_obs)
}
}
\references{
Davison, A. C., & Hinkley, D. V. (1997). Bootstrap Methods and their
Application (1st ed.). Cambridge University Press.
\doi{10.1017/CBO9780511802843}

Efron, B., & Tibshirani, R. J. (1994). An Introduction to the Bootstrap
(1st ed.). Chapman and Hall/CRC. \doi{10.1201/9780429246593}
}
\seealso{
Other indicator_uncertainty: 
\code{\link{add_effect_classification}()},
\code{\link{calculate_acceleration}()},
\code{\link{calculate_bootstrap_ci}()}
}
\concept{indicator_uncertainty}
