
# this is the code to calculate the maximum carbon capture capacity for all the sample sites 

# Can calculate for USGS river data, USGS groundwater data, and GLORICH river data (see options below)

# the output will be stored to the output_cluster folder

# This code was written by Shuang Zhang for publication in Zhang et al. 
# "Insights from river chemistry into carbon capture potential through surficial enhanced rock weathering"

# Created on: Jun, 26, 2021
# Created by: Shuang Zhang


# load required libraries =========================================================================
library(data.table)
library(seacarb)
library(tictoc)
library(rstudioapi)
library(rprojroot) # get the current file location (only works with Rscript or source)


# =================================================================================================

# set working directory ===========================================================================

# Change run_flag to "cluster" if you want to run locally, instead of on cluster
run_flag <- "local"

if (run_flag == "cluster"){
  current_path <- thisfile()
  setwd(dirname(current_path))
}else{
  current_path <- getActiveDocumentContext()$path
  setwd(dirname(current_path))
}

# =================================================================================================

# clear the environment ===========================================================================
rm(list=ls())

time_start <- Sys.time()

# =================================================================================================

# read in compiled output =========================================================================
ph_case <- "base" # change it to "base" for baseline pH, and to "stir" if you want to test the pH correction effect for the USGS river (note that stir will only work for USGS river as we performed the scenario test for USGS river)

region_name <- "us" # change it to "glorich" for GLORICH river, to "us" for USGS river and USGS groundwater

media_name <- "river_weight_annual" # change it to "river_weight_annual" for GLORICH river and USGS river, and to "well_annual" for USGS groundwater

annual_flag <- "four_seasons" # at least 1 measurement in each season

degass_case <- "close" # change it to "close“ for GLORICH river, USGS river, and USGS groundwater for the first rock addition scenario, and to "degass" for GLORICH river and USGS river for the second rock addition scenario

dt <- fread(file.path("..", "input", paste0(ph_case, "_", region_name, "_", media_name, "_", annual_flag, ".csv")))

dt <- dt[1:20, ] # can do a quick test for a small portion of all the samples; delete this line of code if you want to run for all samples

# =================================================================================================

# set some key parameters =========================================================================

# error tolerance for iteration
err_tol   <- 0.1

# supersaturation threshold
omega_thresh <- 5


# select rock type --------------------------------------------------------

# rock_name     <- "calcite"
# rock_name     <- "dolomite"
# rock_name     <- "basalt_flood"

rock_name <- "calcite"

if (rock_name == "calcite"){

  dolomite_prop <- 0
  
  ca_mol <- (1 - dolomite_prop) + dolomite_prop * 0.5
  
  mg_mol <- dolomite_prop * 0.5
  
  na_mol <- 0
  
  k_mol <- 0
  
  fe2_mol <- 0
  
}else if (rock_name == "dolomite"){
  
  dolomite_prop <- 1
  
  ca_mol <- (1 - dolomite_prop) + dolomite_prop * 0.5
  
  mg_mol <- dolomite_prop * 0.5
  
  na_mol <- 0
  
  k_mol <- 0
  
  fe2_mol <- 0
  
  
}else if (rock_name == "basalt_flood"){
  
  ca_mol <- 0.22
  mg_mol <- 0.22
  na_mol <- 0.11
  k_mol <- 0.01
  fe2_mol <- 0.14

  
}

(rock_alk_ratio <- na_mol + k_mol + (fe2_mol + mg_mol + ca_mol) * 2)


# select omega  ---------------------------------------------

omega_case <- "ap" # use apparent ksp

dt[, omega_calc := omega_ap_calcite]


# =================================================================================================

# begin calculation ===============================================================================


num_total <- nrow(dt)

num_step <- 3 # divide the whole samples into several batches for parallel run in the cluster

batch_steps <- ceiling(num_total / num_step)

num_batch <- 1

num_start <- (num_batch - 1) * batch_steps + 1

num_end <- num_batch * batch_steps

if (num_end > num_total){
  num_end <- num_total
}

# get the specific data.table
dt <- dt[num_start:num_end, ]

num_sample <- nrow(dt)

# preallocate arrays ------------------------------------------------------------------------------
# v means vector
# m means matrix

# total steps and final added amounts
# this is big step
v_j_steps <- rep(0, num_sample)
# this is small step
v_k_steps <- rep(0, num_sample)
# added caco3
v_add <- rep(0, num_sample)

# flag indicating if achieving omega
v_omega_flag <- rep(0, num_sample)

# flag indicating if successful within err
v_success_flag <- rep(0, num_sample)

# flag indicating if successful using seacarb
v_seacarb_flag <- rep(1, num_sample)

# create numeric NA to store vectors ----------------------------------------------

v_NA <- rep(as.numeric(NA), num_sample) # placeholder

# ionic strength
v_IS_final <- v_NA

# salinity
v_salinity_final <- v_NA

# saturation state
v_omega_final <- v_NA

# ca final
v_ca_final <- v_NA

# co3 final
v_co3_final <- v_NA

# alk final
v_alk_final <- v_NA

# dic final
v_dic_final <- v_NA

# hco3 final
v_hco3_final <- v_NA

# ph final
v_ph_final <- v_NA


# add more ----------------------------------------------------------------
# co2 final
v_co2_final <- v_NA

# pco2 file
v_pco2_final <- v_NA

# fco2 file
v_fco2_final <- v_NA


# main loop ---------------------------------------------------------------------------------------

rock_max       <- 0.1
num_j_steps    <- 1e4

# outer loop
add_single_j  <- rock_max / num_j_steps

# inner loop
num_k_steps <- 100
add_single_k <- add_single_j / num_k_steps


for (i in 1:num_sample){
  
  if (is.na(dt$omega_calc[i])){
    v_seacarb_flag[i] <- 0
    
    cat("no good initial omega value...\n")
    
    break
  }
  
  if (dt$omega_calc[i] >= omega_thresh){
    # done for the j loop
    v_omega_flag[i] <- 1
    v_success_flag[i] <- 1
    v_IS_final[i] <- dt$IS_calc[i]
    v_salinity_final[i] <- dt$salinity_calc[i]
    v_omega_final[i] <- dt$omega_calc[i]
    v_ca_final[i] <- dt$ca[i]
    v_co3_final[i] <- dt$co3_seacarb[i]
    v_hco3_final[i] <- dt$hco3_seacarb[i]
    v_alk_final[i] <- dt$alk[i]
    v_dic_final[i] <- dt$dic_seacarb[i]
    v_ph_final[i] <- dt$ph_correct[i]
    v_co2_final[i] <- dt$co2_seacarb[i]
    v_pco2_final[i] <- dt$pco2_seacarb[i]
    v_fco2_final[i] <- dt$fco2_seacarb[i]
    
  }else{
    dic_single <- dt$dic_seacarb[i]
    alk_single <- dt$alk[i]
    ca_single <- dt$ca[i]
    T_single <- dt$temp[i]
    P_single <- dt$p_hydro[i]
    salinity_single <- dt$salinity_calc[i]
    IS_single <- dt$IS_calc[i]
    
    pco2_single <- dt$pco2_seacarb[i]
    
    for (j in 1:num_j_steps){
      
      rock_mol_add_j <- add_single_j * j
      
      ca_add <- rock_mol_add_j * ca_mol
      mg_add <- rock_mol_add_j * mg_mol
      fe2_add <- rock_mol_add_j * fe2_mol
      na_add <- rock_mol_add_j * na_mol
      k_add <- rock_mol_add_j * k_mol
      
      ca_update   <- ca_single  + ca_add
      dic_update  <- dic_single + rock_mol_add_j * rock_alk_ratio
      alk_update  <- alk_single + rock_mol_add_j * rock_alk_ratio
      
      salinity_update <- salinity_single + ca_add * 40 + rock_mol_add_j * rock_alk_ratio * 61 + mg_add * 24 + fe2_add * 56 + na_add * 23 + k_add * 39
      IS_update <- IS_single + 1/2 * (ca_add * 4 + rock_mol_add_j * rock_alk_ratio * 1  + mg_add * 4 + fe2_add * 4 + na_add * 1 + k_add * 1)# ca + 2hco3
      
      # recalculate
      
      if (degass_case == "degass"){
        df_update   <- carb(24, pco2_single, alk_update, S=salinity_update, T=T_single, P = P_single, k1k2="m06", kf="dg", ks="d")
        
      }else if (degass_case == "close"){
        df_update   <- carb(15, alk_update, dic_update, S=salinity_update, T=T_single, P = P_single, k1k2="m06", kf="dg", ks="d")
      }
      
      co3_update  <- as.numeric(df_update$CO3)


      ksp_single <-  Kspc(S = salinity_update, T = T_single, P = P_single)
      omega_update <- ca_update * co3_update / ksp_single

      
      if (is.na(omega_update)){
        v_seacarb_flag[i] <- 0
        
        cat("seacarb produced NA values..., break the loop\n\n")
        break
      }
      
      if (omega_update >= omega_thresh){
        v_omega_flag[i] <- 1
        
        if (omega_update - omega_thresh < err_tol){
          # done for the j loop
          v_success_flag[i] <- 1
          v_j_steps[i] <- j
          v_add[i] <- rock_mol_add_j
          v_IS_final[i] <- IS_update
          v_salinity_final[i] <- salinity_update
          v_omega_final[i] <- omega_update
          v_ca_final[i] <- ca_update
          v_co3_final[i] <- co3_update
          v_hco3_final[i] <- as.numeric(df_update$HCO3)
          v_alk_final[i] <- as.numeric(df_update$ALK)
          v_dic_final[i] <- as.numeric(df_update$DIC)
          v_ph_final[i] <- as.numeric(df_update$pH)
          v_co2_final[i] <- as.numeric(df_update$CO2)
          v_pco2_final[i] <- as.numeric(df_update$pCO2)
          v_fco2_final[i] <- as.numeric(df_update$fCO2)
          
        }else{
          # back to one step and add more gradually
          rock_mol_add_j_1 <- rock_mol_add_j - add_single_j
          
          for (k in 1:num_k_steps){
            
            rock_mol_add_j_k <- rock_mol_add_j_1 + add_single_k * k
            
            ca_add <- rock_mol_add_j_k * ca_mol
            mg_add <- rock_mol_add_j_k * mg_mol
            fe2_add <- rock_mol_add_j_k * fe2_mol
            na_add <- rock_mol_add_j_k * na_mol
            k_add <- rock_mol_add_j_k * k_mol
            
            ca_update   <- ca_single  + ca_add
            dic_update  <- dic_single + rock_mol_add_j_k * rock_alk_ratio
            alk_update  <- alk_single + rock_mol_add_j_k * rock_alk_ratio
            
            salinity_update <- salinity_single + ca_add * 40 + rock_mol_add_j_k * rock_alk_ratio * 61 + mg_add * 24 + fe2_add * 56 + na_add * 23 + k_add * 39
            IS_update <- IS_single + 1/2 * (ca_add * 4 + rock_mol_add_j_k * rock_alk_ratio * 1  + mg_add * 4 + fe2_add * 4 + na_add * 1 + k_add * 1)# ca + 2hco3
            
            # recalculate
            
            if (degass_case == "degass"){
              df_update   <- carb(24, pco2_single, alk_update, S=salinity_update, T=T_single, P = P_single, k1k2="m06", kf="dg", ks="d")
              
            }else if (degass_case == "close"){
              df_update   <- carb(15, alk_update, dic_update, S=salinity_update, T=T_single, P = P_single, k1k2="m06", kf="dg", ks="d")
            }
            
            co3_update  <- as.numeric(df_update$CO3)
            
            # update saturation state
            ksp_single <-  Kspc(S = salinity_update, T = T_single, P = P_single)
            omega_update <- ca_update * co3_update / ksp_single
            
            
            if (is.na(omega_update)){
              v_seacarb_flag[i] <- 0
              
              cat("seacarb produced NA values..., break the loop\n\n")
              break
            }
            
            # the following condition will be met 100%
            if (omega_update >= omega_thresh){
              # done for the k loop
              v_j_steps[i] <- j - 1
              v_k_steps[i] <- k
              v_add[i] <- rock_mol_add_j_k
              v_IS_final[i] <- IS_update
              v_salinity_final[i] <- salinity_update
              v_omega_final[i] <- omega_update
              v_ca_final[i] <- ca_update
              v_co3_final[i] <- co3_update
              v_hco3_final[i] <- as.numeric(df_update$HCO3)
              v_alk_final[i] <- as.numeric(df_update$ALK)
              v_dic_final[i] <- as.numeric(df_update$DIC)
              v_ph_final[i] <- as.numeric(df_update$pH)
              v_co2_final[i] <- as.numeric(df_update$CO2)
              v_pco2_final[i] <- as.numeric(df_update$pCO2)
              v_fco2_final[i] <- as.numeric(df_update$fCO2)
              # change symbol
              if (omega_update - omega_thresh < err_tol){
                v_success_flag[i] <- 1
              } # ebd err compare
              
              break # break k loop
            } # end omega compare
          } # end k
          
        } # end else (omega_update - omega_thresh > err_tol)
        
        break # break j loop 
      } # end if (omega_update >= omega_thresh)

    } # end for loop of j 
  } # end else (omega_initial < omega_thresh)
  
  # monitor station number ------------------------------------------------------------------------
  cat(i, "finished\n\n")
  
  cat(i + (num_batch - 1) * batch_steps, "finished for whole data\n\n")
  
}# end for loop of samples i


# add to data.table ---------------------------------------------------------------------------------------

dt[, `:=`(j_steps = v_j_steps,
          k_steps = v_k_steps,
          add = v_add,
          omega_flag = v_omega_flag,
          success_flag = v_success_flag,
          seacarb_flag = v_seacarb_flag,
          IS_final = v_IS_final,
          salinity_final = v_salinity_final,
          omega_final = v_omega_final,
          ca_final = v_ca_final,
          co3_final = v_co3_final,
          alk_final = v_alk_final,
          dic_final = v_dic_final,
          hco3_final = v_hco3_final,
          ph_final = v_ph_final,
          co2_final = v_co2_final,
          pco2_final = v_pco2_final,
          fco2_final = v_fco2_final)]




# save the newly compiled file --------------------------------------------

fwrite(dt, file.path("output_cluster", paste0(region_name, "_", rock_name, "_thresh_", omega_thresh, "_omega_",  omega_case, "_batch_", num_batch, ".csv")))

# end the program ---------------------------------------------------------

print(Sys.time() - time_start)


