% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculate_bootstrap_ci.R
\name{calculate_bootstrap_ci}
\alias{calculate_bootstrap_ci}
\title{Calculate confidence intervals for a dataframe with bootstrap replicates}
\usage{
calculate_bootstrap_ci(
  bootstrap_samples_df,
  grouping_var,
  type = c("perc", "bca", "norm", "basic"),
  conf = 0.95,
  h = function(t) t,
  hinv = function(t) t,
  no_bias = FALSE,
  aggregate = TRUE,
  data_cube = NA,
  fun = NA,
  ...,
  ref_group = NA,
  influence_method = ifelse(is.element("bca", type), "usual", NA),
  progress = FALSE
)
}
\arguments{
\item{bootstrap_samples_df}{A dataframe containing the bootstrap replicates,
where each row represents a bootstrap sample. As returned by
\code{bootstrap_cube()}. Apart from the \code{grouping_var} column, the following
columns should be present:
\itemize{
\item \code{est_original}: The statistic based on the full dataset per group
\item \code{rep_boot}: The statistic based on a bootstrapped dataset (bootstrap
replicate)
}}

\item{grouping_var}{A character vector specifying the grouping variable(s)
for the bootstrap analysis. The function \code{fun(data_cube, ...)} should return
a row per group. The specified variables must not be redundant, meaning they
should not contain the same information (e.g., \code{"time_point"} (1, 2, 3) and
\code{"year"} (2000, 2001, 2002) should not be used together if \code{"time_point"} is
just an alternative encoding of \code{"year"}).
This variable is used to split the dataset into groups for separate
confidence interval calculations.}

\item{type}{A character vector specifying the type(s) of confidence intervals
to compute. Options include:
\itemize{
\item \code{"perc"}: Percentile interval
\item \code{"bca"}: Bias-corrected and accelerated interval
\item \code{"norm"}: Normal interval
\item \code{"basic"}: Basic interval
\item \code{"all"}: Compute all available interval types (default)
}}

\item{conf}{A numeric value specifying the confidence level of the intervals.
Default is \code{0.95} (95 \% confidence level).}

\item{h}{A function defining a transformation. The intervals are calculated
on the scale of \code{h(t)} and the inverse function \code{hinv} applied to the
resulting intervals. It must be a function of one variable only. The default
is the identity function.}

\item{hinv}{A function, like \code{h}, which returns the inverse of \code{h}. It is
used to transform the intervals calculated on the scale of \code{h(t)} back to the
original scale. The default is the identity function. If \code{h} is supplied but
\code{hinv} is not, then the intervals returned will be on the transformed scale.}

\item{no_bias}{Logical. If \code{TRUE} intervals are centered around the original
estimates (bias is ignored). Default is \code{FALSE}.}

\item{aggregate}{Logical. If \code{TRUE} (default), the function returns distinct
confidence limits per group. If \code{FALSE}, the confidence limits are added to
the original bootstrap dataframe \code{bootstrap_samples_df}.}

\item{data_cube}{Only used when \code{type = "bca"}. A data cube object (class
'processed_cube' or 'sim_cube', see \code{b3gbi::process_cube()}) or a dataframe
(from \verb{$data} slot of 'processed_cube' or 'sim_cube'). As used by
\code{bootstrap_cube()}. To limit runtime, we recommend using a
dataframe with custom function as \code{fun}.}

\item{fun}{Only used when \code{type = "bca"}. A function which, when applied to
\code{data_cube} returns the statistic(s) of interest. This function must return a
dataframe with a column \code{diversity_val} containing the statistic of interest.
As used by \code{bootstrap_cube()}.}

\item{...}{Additional arguments passed on to \code{fun}.}

\item{ref_group}{Only used when \code{type = "bca"}. A string indicating the
reference group to compare the statistic with. Default is \code{NA}, meaning no
reference group is used.
As used by \code{bootstrap_cube()}.}

\item{influence_method}{A string specifying the method used for calculating
the influence values.
\itemize{
\item \code{"usual"}: Negative jackknife (default if BCa is selected).
\item \code{"pos"}: Positive jackknife
}}

\item{progress}{Logical. Whether to show a progress bar for jackknifing. Set
to \code{TRUE} to display a progress bar, \code{FALSE} (default) to suppress it.}
}
\value{
A dataframe containing the bootstrap results with the following
columns:
\itemize{
\item \code{est_original}: The statistic based on the full dataset per group
\item rep_boo
\item \code{est_boot}: The bootstrap estimate (mean of bootstrap replicates per
group)
\item \code{se_boot}: The standard error of the bootstrap estimate (standard
deviation of the bootstrap replicates per group)
\item \code{bias_boot}: The bias of the bootstrap estimate per group
\item \code{int_type}: The interval type
\item \code{ll}: The lower limit of the confidence interval
\item \code{ul}: The upper limit of the confidence interval
\item \code{conf}: The confidence level of the interval
When \code{aggregate = FALSE}, the dataframe contains the columns from
\code{bootstrap_samples_df} with one row per bootstrap replicate.
}
}
\description{
This function calculates confidence intervals for a dataframe containing
bootstrap replicates based on different methods, including percentile
(\code{perc}), bias-corrected and accelerated (\code{bca}), normal (\code{norm}), and basic
(\code{basic}).
}
\details{
We consider four different types of intervals (with confidence level
\eqn{\alpha}). The choice for confidence interval types and their calculation
is in line with the \pkg{boot} package in R (Canty & Ripley, 1999) to ensure
ease of implementation. They are based on the definitions provided by
Davison & Hinkley (1997, Chapter 5)
(see also DiCiccio & Efron, 1996; Efron, 1987).
\enumerate{
\item \strong{Percentile}: Uses the percentiles of the bootstrap distribution.

\deqn{CI_{perc} = \left[ \hat{\theta}^*_{(\alpha/2)}, \hat{\theta}^*_{(1-\alpha/2)} \right]}

where \eqn{\hat{\theta}^*_{(\alpha/2)}} and
\eqn{\hat{\theta}^*_{(1-\alpha/2)}} are the \eqn{\alpha/2} and
\eqn{1-\alpha/2} percentiles of the bootstrap distribution, respectively.
\item \strong{Bias-Corrected and Accelerated (BCa)}: Adjusts for bias and
acceleration

Bias refers to the systematic difference between the observed statistic
from the original dataset and the center of the bootstrap distribution of
the statistic. The bias correction term is calculated as follows:

\deqn{\hat{z}_0 = \Phi^{-1}\left(\frac{\#(\hat{\theta}^*_b < \hat{\theta})}{B}\right)}

where \eqn{\#} is the counting operator and \eqn{\Phi^{-1}} the inverse
cumulative density function of the standard normal distribution.

Acceleration quantifies how sensitive the variability of the statistic is
to changes in the data.
See \code{calculate_acceleration()} on how this is calculated.
\itemize{
\item \eqn{a=0}: The statistic's variability does not depend on the data
(e.g., symmetric distribution)
\item \eqn{a>0}: Small changes in the data have a large effect on the
statistic's variability (e.g., positive skew)
\item \eqn{a<0}: Small changes in the data have a smaller effect on the
statistic's variability (e.g., negative skew).
}

The bias and acceleration estimates are then used to calculate adjusted
percentiles.

\eqn{\alpha_1 = \Phi\left( \hat{z}_0 + \frac{\hat{z}_0 + z_{\alpha/2}}{1 - \hat{a}(\hat{z}_0 + z_{\alpha/2})} \right)},
\eqn{\alpha_2 = \Phi\left( \hat{z}_0 + \frac{\hat{z}_0 + z_{1 - \alpha/2}}{1 - \hat{a}(\hat{z}_0 + z_{1 - \alpha/2})} \right)}

So, we get

\deqn{CI_{bca} = \left[ \hat{\theta}^*_{(\alpha_1)}, \hat{\theta}^*_{(\alpha_2)} \right]}
\item \strong{Normal}: Assumes the bootstrap distribution of the statistic is
approximately normal

\deqn{CI_{norm} = \left[\hat{\theta} - \text{Bias}_{\text{boot}} - \text{SE}_{\text{boot}} \times z_{1-\alpha/2},
   \hat{\theta} - \text{Bias}_{\text{boot}} + \text{SE}_{\text{boot}} \times z_{1-\alpha/2} \right]}

where \eqn{z_{1-\alpha/2}} is the \eqn{1-\alpha/2} quantile of the
standard normal distribution.
\item \strong{Basic}: Centers the interval using percentiles

\deqn{CI_{basic} = \left[ 2\hat{\theta} - \hat{\theta}^*_{(1-\alpha/2)},
   2\hat{\theta} - \hat{\theta}^*_{(\alpha/2)} \right]}

where \eqn{\hat{\theta}^*_{(\alpha/2)}} and
\eqn{\hat{\theta}^*_{(1-\alpha/2)}} are the \eqn{\alpha/2} and
\eqn{1-\alpha/2} percentiles of the bootstrap distribution, respectively.
}
}
\examples{
\dontrun{
# After processing a data cube with b3gbi::process_cube()

# Function to calculate statistic of interest
# Mean observations per year
mean_obs <- function(data) {
  out_df <- aggregate(obs ~ year, data, mean) # Calculate mean obs per year
  names(out_df) <- c("year", "diversity_val") # Rename columns
  return(out_df)
}
mean_obs(processed_cube$data)

# Perform bootstrapping
bootstrap_mean_obs <- bootstrap_cube(
  data_cube = processed_cube$data,
  fun = mean_obs,
  grouping_var = "year",
  samples = 1000,
  seed = 123,
  progress = FALSE
)
head(bootstrap_mean_obs)

# Calculate confidence limits
# Percentile interval
ci_mean_obs1 <- calculate_bootstrap_ci(
  bootstrap_samples_df = bootstrap_mean_obs,
  grouping_var = "year",
  type = "perc",
  conf = 0.95,
  aggregate = TRUE
)
ci_mean_obs1

# All intervals
ci_mean_obs2 <- calculate_bootstrap_ci(
  bootstrap_samples_df = bootstrap_mean_obs,
  grouping_var = "year",
  type = c("perc", "bca", "norm", "basic"),
  conf = 0.95,
  aggregate = TRUE,
  data_cube = processed_cube$data, # Required for BCa
  fun = mean_obs,                  # Required for BCa
  progress = FALSE
)
ci_mean_obs2
}
}
\references{
Canty, A., & Ripley, B. (1999). boot: Bootstrap Functions (Originally by
Angelo Canty for S) [Computer software].
\url{https://CRAN.R-project.org/package=boot}

Davison, A. C., & Hinkley, D. V. (1997). Bootstrap Methods and their
Application (1st ed.). Cambridge University Press.
\doi{10.1017/CBO9780511802843}

DiCiccio, T. J., & Efron, B. (1996). Bootstrap confidence intervals.
Statistical Science, 11(3). \doi{10.1214/ss/1032280214}

Efron, B. (1987). Better Bootstrap Confidence Intervals. Journal of the
American Statistical Association, 82(397), 171–185.
\doi{10.1080/01621459.1987.10478410}

Efron, B., & Tibshirani, R. J. (1994). An Introduction to the Bootstrap
(1st ed.). Chapman and Hall/CRC. \doi{10.1201/9780429246593}
}
\seealso{
Other indicator_uncertainty: 
\code{\link{add_effect_classification}()},
\code{\link{bootstrap_cube}()},
\code{\link{calculate_acceleration}()}
}
\concept{indicator_uncertainty}
