calibration_obs<- df_patient[1:calibration_index,]
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
residuals<- rbind(residuals, residuals_temporal)
number_of_res<- nrow(residuals)
if (number_of_res < number_of_obs) {
calibration_obs<- df_patient[(number_of_res+1):number_of_obs,]
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
residuals_temporal<- residuals_temporal[(1:(nrow(calibration_obs))),]
residuals<- rbind(residuals, residuals_temporal)
}
}
}
df_patient<- cbind(df_patient, residuals)
df_contain_resid_all_patient<-rbind(df_contain_resid_all_patient, df_patient)
}
return(df_contain_resid_all_patient)
}
mean_estimator_ind_patient<- function(df,
df_normal,
variable_name){
df$calibrated_resid<-0
iter<-180
kl<-nrow(df)
if (iter> kl) {
iter<-kl
}
bank_data<- df_normal %>%group_by(ID)%>%slice(1:28)
bank_data$Datetime<- as.Date(bank_data$Datetime)
bank_data$calibrated_resid<-0
bank_data$mean_estimation<-0
df$mean_estimation<-0
online_mean<-data.frame()
patient_num = df$ID[1]
if (variable_name == "Flow") {
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<-data.frame()
new_df<- rbind(bank_data, a)
new_model<- lmer(Flow ~  (1 + norm_speed |ID) + HCT
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<-
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),3]* a$HCT+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]+
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Flow[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
if (kl>iter) {
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Flow"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
}
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
if (kl>iter) {
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
}else if(variable_name == "Motor_power"){
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<- rbind(bank_data, a)
new_model<- lmer(Motor_power ~  (1 + norm_speed |ID)
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<- coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Motor_power[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Motor_power"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
df$updated<-0
df$updated[1]<-1
control_limits<- data.frame(lcl = rep(0, iter), ucl = rep(0,iter), mean_estimate = rep(0, iter))
return(df[, c("calibrated_resid", "mean_estimation","sigma", "updated")])
}
df<-patient_specific_residual(variable_name = "Flow",
df_all_patient = df_all_patient)
df<-patient_specific_residual(variable_name = "Flow",
df_all_patient = df_all_patient)
#'
#' @param df data frame containing patient data
#' @param df_normal data frame containing normal data
#' @param variable_name name of the variable for which to estimate the mean
#'
#' @return data frame containing calibrated residuals, mean estimations, sigma, and updated columns
#'
#' @examples
#' mean_estimator_ind_patient(df, df_normal, "Flow")
#'
mean_estimator_ind_patient<- function(df,
df_normal,
variable_name){
df$calibrated_resid<-0
iter<-180
kl<-nrow(df)
if (iter> kl) {
iter<-kl
}
bank_data<- df_normal %>%group_by(ID)%>%slice(1:28)
bank_data$Datetime<- as.Date(bank_data$Datetime)
bank_data$calibrated_resid<-0
bank_data$mean_estimation<-0
df$mean_estimation<-0
online_mean<-data.frame()
patient_num = df$ID[1]
if (variable_name == "Flow") {
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<-data.frame()
new_df<- rbind(bank_data, a)
new_model<- lmer(Flow ~  (1 + norm_speed |ID) + HCT
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<-
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),3]* a$HCT+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]+
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Flow[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
if (kl>iter) {
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Flow"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
}
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
if (kl>iter) {
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
}else if(variable_name == "Motor_power"){
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<- rbind(bank_data, a)
new_model<- lmer(Motor_power ~  (1 + norm_speed |ID)
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<- coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Motor_power[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Motor_power"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
df$updated<-0
df$updated[1]<-1
control_limits<- data.frame(lcl = rep(0, iter), ucl = rep(0,iter), mean_estimate = rep(0, iter))
return(df[, c("calibrated_resid", "mean_estimation","sigma", "updated")])
}
df<-patient_specific_residual(variable_name = "Flow",
df_all_patient = df_all_patient)
#'
#' @param df data frame containing patient data
#' @param df_normal data frame containing normal data
#' @param variable_name name of the variable for which to estimate the mean
#'
#' @return data frame containing calibrated residuals, mean estimations, sigma, and updated columns
#'
#' @examples
#' mean_estimator_ind_patient(df, df_normal, "Flow")
#'
mean_estimator_ind_patient<- function(df,df_normal,variable_name){
df$calibrated_resid<-0
iter<-180
kl<-nrow(df)
if (iter> kl) {
iter<-kl
}
bank_data<- df_normal %>%group_by(ID)%>%slice(1:28)
bank_data$Datetime<- as.Date(bank_data$Datetime)
bank_data$calibrated_resid<-0
bank_data$mean_estimation<-0
df$mean_estimation<-0
online_mean<-data.frame()
patient_num = df$ID[1]
if (variable_name == "Flow") {
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<-data.frame()
new_df<- rbind(bank_data, a)
new_model<- lmer(Flow ~  (1 + norm_speed |ID) + HCT
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<-
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),3]* a$HCT+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]+
coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Flow[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
if (kl>iter) {
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Flow"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
}
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
if (kl>iter) {
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
}else if(variable_name == "Motor_power"){
for (j in 1:(iter)) {
a<- df[1:j,]
new_df<- rbind(bank_data, a)
new_model<- lmer(Motor_power ~  (1 + norm_speed |ID)
, data = new_df, REML = F,
control = lmerControl(optimizer ="bobyqa", calc.derivs = F))
the_estimated_mean<- coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),2]+
a$norm_speed *coef(new_model)$ID[which( row.names(coef(new_model)$ID)==patient_num),1]
online_mean<- rbind(online_mean, the_estimated_mean)
df[j,"calibrated_resid"]<- as.numeric(df$Motor_power[j]- online_mean[j,1])
df[j,"mean_estimation"]<-  online_mean[j,1]
}
df[((iter+1): kl), "calibrated_resid"]<- df[((iter+1):kl), "Motor_power"] - online_mean[iter,1]
df[((iter+1): kl), "mean_estimation"]<- online_mean[iter,1]
df$sigma<-0
for (s in 1:iter) {
df$sigma[s]<- sqrt(var(df[1:s,"calibrated_resid"])[1])
}
df$sigma[1]<-2
df[((iter+1): kl), "sigma"]<- df[iter,"sigma"]
}
df$updated<-0
df$updated[1]<-1
control_limits<- data.frame(lcl = rep(0, iter), ucl = rep(0,iter), mean_estimate = rep(0, iter))
return(df[, c("calibrated_resid", "mean_estimation","sigma", "updated")])
}
variable_name
df_all_patient
# Find all unique patient IDs in the data frame.
u= sort(unique(df_all_patient$ID))
# Create an empty data frame to store residuals for all patients.
df_contain_resid_all_patient<-data.frame()
# Loop through all patients.
for(k in 1:length(u)){
# Subset the data frame to contain only data for the current patient.
df_patient<- df_all_patient[df_all_patient$ID==u[k],]
# Subset the admission file to contain only admission data for the current patient.
patient_admission<- admission_file[admission_file$ID==u[k] ,]
# Find all dates on which the stored speed value changed for the current patient.
dates_speed_changed<- sort(df[which(df_patient$Stored_speed != lag(df_patient$Stored_speed)),]$Datetime)
# If the stored speed value never changed, use only admission dates as events.
if (length(dates_speed_changed)!=0) {
events<- sort(unique(patient_admission$admission_date),
unique(dates_speed_changed))
}else{
events<- unique(patient_admission$admission_date)
}
# Create an empty data frame to store residuals for the current patient.
residuals<- data.frame()
# Get the total number of observations for the current patient.
number_of_obs<-nrow(df_patient)
# If there are no events for the current patient, skip to the next patient.
if (length(events)==0) {
next
}else{
# If there are multiple events for the current patient, calculate residuals
# for each event.
if (length(events)>1) {
# Find the index of the last observation before the first event.
calibration_index<- max(which(df_patient$Datetime < events[1]))
# Subset the data frame to contain only observations before the first event.
calibration_obs<- df_patient[1:calibration_index,]
# Calculate residuals for the first event.
residuals<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
# Create an empty data frame to store residuals for the current event.
residuals_temporal<- residuals_temporal[(1:(nrow(calibration_obs))),]
# Loop through all events except for the first one.
for (t in 2:length(events)) {
# Find the indices of the observations between the current and previous event.
i<- max(which(df_patient$Datetime <= events[t]))
j<- max(which(df_patient$Datetime <= events[t-1]))
# Subset the data frame to contain only observations between the current and previous event.
calibration_obs<- df_patient[(j+1):i,]
# Calculate residuals for the current event.
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
# Create an empty data frame to store residuals for the current event.
residuals_temporal<- residuals_temporal[(1:(nrow(calibration_obs))),]
# Add the residuals for the current event to the residuals
residuals<- rbind(residuals, residuals_temporal)
}
number_of_resid<- nrow(residuals)
if (number_of_resid < number_of_obs) {
calibration_obs<- df_patient[(number_of_res+1):number_of_obs,]
residuals_temporal<-  mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
residuals_temporal<- residuals_temporal[(1:(nrow(calibration_obs))),]
residuals<- rbind(residuals, residuals_temporal)
}
}else{
calibration_index<- max(which(df_patient$Datetime < events[1]))
calibration_obs<- df_patient[1:calibration_index,]
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
residuals<- rbind(residuals, residuals_temporal)
number_of_res<- nrow(residuals)
if (number_of_res < number_of_obs) {
calibration_obs<- df_patient[(number_of_res+1):number_of_obs,]
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
residuals_temporal<- residuals_temporal[(1:(nrow(calibration_obs))),]
residuals<- rbind(residuals, residuals_temporal)
}
}
}
df_patient<- cbind(df_patient, residuals)
df_contain_resid_all_patient<-rbind(df_contain_resid_all_patient, df_patient)
}
k=1
# Subset the data frame to contain only data for the current patient.
df_patient<- df_all_patient[df_all_patient$ID==u[k],]
# Subset the admission file to contain only admission data for the current patient.
patient_admission<- admission_file[admission_file$ID==u[k] ,]
# Find all dates on which the stored speed value changed for the current patient.
dates_speed_changed<- sort(df[which(df_patient$Stored_speed != lag(df_patient$Stored_speed)),]$Datetime)
df_patient$Stored_speed
df_patient$Stored_speed != lag(df_patient$Stored_speed
)
# If the stored speed value never changed, use only admission dates as events.
if (length(dates_speed_changed)!=0) {
events<- sort(unique(patient_admission$admission_date),
unique(dates_speed_changed))
}else{
events<- unique(patient_admission$admission_date)
}
# Create an empty data frame to store residuals for the current patient.
residuals<- data.frame()
events
admission_file
admission_file[admission_file$ID==u[k] ,]
# Subset the admission file to contain only admission data for the current patient.
patient_admission<- admission_file[admission_file$ID==u[k] ,]
# If the stored speed value never changed, use only admission dates as events.
if (length(dates_speed_changed)!=0) {
events<- sort(unique(patient_admission$admission_date),
unique(dates_speed_changed))
}else{
events<- unique(patient_admission$admission_date)
}
dates_speed_changed
events<- unique(patient_admission$admission_date)
# Create an empty data frame to store residuals for the current patient.
residuals<- data.frame()
# Get the total number of observations for the current patient.
number_of_obs<-nrow(df_patient)
length(events)>1
calibration_index<- max(which(df_patient$Datetime < events[1]))
calibration_obs<- df_patient[1:calibration_index,]
residuals_temporal<- mean_estimator_ind_patient(df = calibration_obs,
df_normal = pws_patient,
variable_name = variable_name)
unique(df_all_patient$Stored_speed)
View(df_all_patient)
# loading the datasets:
df_all_patient<-read.csv("Tutorial/data/syn_data_cardiacarrythmia.csv")
# Generate a new consecutive ID column
dense_rank(df_all_patient$ID)
library(vctrs)
# Generate a new consecutive ID column
dense_rank(df_all_patient$ID)
# Filter out NULL values in the grouping ID column
my_df <- df_patient %>% filter(!is.null(ID))
# Generate a new consecutive ID column
my_df$new_id <- dense_rank(my_df$ID)
my_df
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("Tutorial/data/syn_data_cardiacarrythmia.csv")
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("Tutorial/data/syn_data_cardiacarrythmia.csv",
sep = ",")
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",
sep = ",")
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
df_all_patient
View(df_all_patient)
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",sep =" ")
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",sep =":")
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",sep =";")
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",sep ="")
df_all_patient
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
#stable_patients
pws_patient<-read.csv("example_stable_patient.csv")
#stable_patients
pws_patient<-read.csv("Tutorial/data/example_stable_patient.csv")
pws_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv",sep ="")
View(df_all_patient)
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv", sep = "\t")
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv", sep = "|")
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
dense_rank(df_all_patient$ID)
my_df <- df_all_patient %>%
group_by(ID) %>%
mutate(new_col = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
my_df$Stored_speed
unique(my_df$Stored_speed)
my_df <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
unique(my_df$Stored_speed)
df_all_patient <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
write.csv(df_all_patient, "syn_data_cardiacarrythmia.csv")
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
df_all_patient<- dense_rank(df_all_patient$ID)
df_all_patient <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
df_all_patient
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
df_all_patient<- dense_rank(df_all_patient$ID)
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
df_all_patient
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
df_all_patient<- dense_rank(df_all_patient$ID)
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
df_all_patient<- dense_rank(df_all_patient$ID)
df_all_patient
df_all_patient <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
df_all_patient
# loading the datasets:
df_all_patient<-read.csv("tutorial/data/syn_data_cardiacarrythmia.csv")
df_all_patient$ID<- dense_rank(df_all_patient$ID)
df_all_patient
df_all_patient <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
write.csv(df_all_patient, "syn_data_cardiacarrythmia.csv")
#or:
df_all_patient<-read.csv("Tutorial/data/syn_data_majorbleeding.csv")
df_all_patient$ID<- dense_rank(df_all_patient$ID)
df_all_patient <- df_all_patient %>%
group_by(ID) %>%
mutate(Stored_speed = if_else(row_number() == sample(row_number(), size = 1), Stored_speed + 100, Stored_speed)) %>%
ungroup()
write.csv(df_all_patient, "syn_data_majorbleeding.csv")
df_all_patient
