---
title: "DSF"
output:
  pdf_document: default
  html_document: default
date: "2022-10-13"
---

#Spatially optimising market-based instruments on private lands for conservation
## Code to run all scenarios
### For queries please email brooke.williams@uq.edu.au or j.rhodes@uq.edu.au
##Set up 
```{r "knitr config", cache = FALSE, include=FALSE}
require("knitr")
opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
```
Load packages and set working directory
```{r setup}
library(gurobi)
library(Matrix)
library(rgeos)
library(sp)
library(dplyr)
library(magrittr)
library(rgdal)
library(purrr)
library(plyr)
library(stringr)
library(parallel)
library(data.table)
library(doSNOW)
library(foreach)
library(doParallel)
library(ranger)
library(palmerpenguins)
library(tidyverse)
library(kableExtra)
library(stringi)
library(readr)
library(gtools)
library(devtools)
rm(list=ls())
getwd()
```
Set wd, output directory, import functions and pre-processed data, define the number of simulations and the overall budget
```{r, include=FALSE}
#Load the required functions
source('./functions.R')
#Load the data
load("./preprocessing/pu.df_11.05.23.RData")
#Set number of simulations (based on sensitivity analysis - see section SM1.4 of manuscript)
sim <- 200
#Test overall budget level
b <- 20300000
b.vec <- c(1:10 * 20300000)
#Create output folders - you can name these whatever you like
outfolder <- "outputs"
outfoldermaps <- "outputs/maps"
outfoldervals <- "outputs/vals"
# create folder:
  if (!dir.exists(outfoldermaps)){
    dir.create(outfoldermaps, recursive = TRUE)
  } else {
    print("Dir already exists!")
  }
if (!dir.exists(outfoldervals)){
  dir.create(outfoldervals, recursive = TRUE)
} else {
  print("Dir already exists!")
}
print("Set up complete")
```
APPROACHES AND BUDGET SCENARIOS
##Approach 4 - Integreated approach (t7) single budget 
Set up dataframe for appropriate scenario
```{r}
scen <- "approach4"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "MeanWTA.tot", "t7.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
```
In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
```{r, include=FALSE}
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)
```
Run through the first of simulations - this is for the first npv increment
```{r, results = FALSE, include=FALSE}
#Clean out the paralell processing folder
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
```
Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
```{r}
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investment)
incre <- (max_npv-min_npv)/y
#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}
```
Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
```{r, results = FALSE, include=FALSE}
##Create a new dataframe which has all puid's (LGAs), so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  #check this
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
}
```
###Set up structures for the optimisation
```{r}
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
matrix_data=matrix(0,nrow=length(unique(pu.df$LGA)),ncol=length(unique(pu.df$LGA)))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of koala area (in this current formulation this is just property area)
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}

```
Set up and run the optimisation (to select priority planning units (LGA's))
```{r, results = FALSE}
model <- list()
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))

```
###Export the results
```{r, results = FALSE}
#Load in base shp file for the planning units
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre/1000000, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results
make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)

# Save the object to an RData file
save(budget.matrix, file = "./preprocessing/budget.matrix_approach4.RData")
save(consben.matrix, file = "./preprocessing/consben.matrix_approach4.RData")
save(cons.all.incre, file = "./preprocessing/cons.all.incre_approach4.RData")
save(npv.all.incre, file = "./preprocessing/npv.all.incre_approach4.RData")
save(area.matrix, file = "./preprocessing/area.matrix_approach4.RData")
```

##Approach 4 - Integrated approach (t7) cycling through the budgets
```{r, include=FALSE}
#If not already loaded, need this for the make maps function
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

scen <- "approach4"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "MeanWTA.tot", "t7.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")

#In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

#Add se and sd values
llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

#Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
#y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y

#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

#Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
  
}
###Set up structures for the optimisation
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
df_new <- df_new[rowSums(df_new[])>0,]
matrix_data=matrix(0,nrow=nrow(df_new),ncol=nrow(df_new))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Hack - make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}

for (b in b.vec[1:10]){
#Set up and run the optimisation (to select priority planning units (LGA's))
model <- list()
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
save(result, file = paste0(outfolder, "./", scen, "_", b, ".RData"))

###Export the results
###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results
make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)

}
```

##Approach 1 (new) - Integrated approach (t0) cycling through the budgets
```{r, include=FALSE}
#If not already loaded, need this for the make maps function
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

scen <- "approach1"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t0", "MeanAdopt", "LValHa", "t0", "area")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
df$prob.property <- 1

#In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

#Add se and sd values
llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

#Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
#y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y

#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

#Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
  
}
###Set up structures for the optimisation
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
df_new <- df_new[rowSums(df_new[])>0,]
matrix_data=matrix(0,nrow=nrow(df_new),ncol=nrow(df_new))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Hack - make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}

for (b in b.vec[1:10]){

#Set up and run the optimisation (to select priority planning units (LGA's))
model <- list()
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
save(result, file = paste0(outfolder, "./", scen, "_", b, ".RData"))

###Export the results
###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results
make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)
}

```

##Approach 2 (new) - Integrated approach (t0) cycling through the budgets
```{r, include=FALSE}
#If not already loaded, need this for the make maps function
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

scen <- "approach2"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t0", "MeanAdopt", "MeanWTA.tot", "t0.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")

#In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

#Add se and sd values
llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

#Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
#y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y

#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

#Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
  
}
###Set up structures for the optimisation
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
df_new <- df_new[rowSums(df_new[])>0,]
matrix_data=matrix(0,nrow=nrow(df_new),ncol=nrow(df_new))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Hack - make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}


for (b in b.vec[1:10]){

#Set up and run the optimisation (to select priority planning units (LGA's))
model <- list()
#model$A <- matrix(c(data_frame_test$npv), nrow=1)
#nrow needs to be the number of planning units/LGA's + 1 (the 1 is the data vector)
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
#save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))
save(result, file = paste0(outfolder, "./", scen, "_", b, ".RData"))

#load("E:/Linkage/DSF code/private_land_conservation_DSF/outputs_v12/scenario2_101500000.RData")
###Export the results
###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results

make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)

}

```


##Approach 3 (new) - Integrated approach (t7) cycling through the budgets
```{r, include=FALSE}
#If not already loaded, need this for the make maps function
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

scen <- "approach3"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "LValHa", "t7", "area")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
df$prob.property <- 1

#In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

#Add se and sd values
llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

#Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
#y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y

#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

#Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
  
}
###Set up structures for the optimisation
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
df_new <- df_new[rowSums(df_new[])>0,]
matrix_data=matrix(0,nrow=nrow(df_new),ncol=nrow(df_new))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Hack - make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}


for (b in b.vec[1:10]){

#Set up and run the optimisation (to select priority planning units (LGA's))
model <- list()
#model$A <- matrix(c(data_frame_test$npv), nrow=1)
#nrow needs to be the number of planning units/LGA's + 1 (the 1 is the data vector)
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
#save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))
save(result, file = paste0(outfolder, "./", scen, "_", b, ".RData"))

#load("E:/Linkage/DSF code/private_land_conservation_DSF/outputs_v12/scenario2_101500000.RData")
###Export the results
###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results

make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)

}


```

##To calculate benefits against approach 4
```{r, include=FALSE}

outfolder <- "outputs"
#Choose whichever scenario that you want to calculate the benefits for
scen <- "approach1"
for (b in b.vec[1:10]){
load("./preprocessing/budget.matrix_approach4.RData")
load("./preprocessing/consben.matrix_approach4.RData")
load("./preprocessing/cons.all.incre_approach4.RData")
load("./preprocessing/npv.all.incre_approach4.RData")
load("./preprocessing/area.matrix_approach4.RData")

load(paste0(outfolder, "./", scen, "_", b, ".RData"))
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))

exp.vals(outfoldervals, scen)
}


#Code to calculate other benefits to create Table S9.1 
outfolder <- "outputs"
#Load approach 4 variables
load("./preprocessing/budget.matrix_approach4.RData")
load("./preprocessing/consben.matrix_approach4.RData")
load("./preprocessing/cons.all.incre_approach4.RData")
load("./preprocessing/npv.all.incre_approach4.RData")
load("./preprocessing/area.matrix_approach4.RData")

scen <- "approach4_siteassess40_20300000"
load("./outputsapproach4_siteassess40_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_siteassess50"
load("./outputs/approach4_siteassess50_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_siteassess60"
load("./outputs/approach4_siteassess60_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_rankbids25"
load("./outputs/approach4_rankbids25_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_rankbids35"
load("./outputs/approach4_rankbids35_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_rankbids45"
load("./outputs/approach4_rankbids45_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

scen <- "approach4_bctmetric1"
load("./outputs/approach4_bctmetric1_20300000.RData")
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
exp.vals(outfoldervals, scen)

```







##19 priority areas only, using Approach 4
```{r}
scen <- "19areas"
df <- df.org[c("Invest_PPA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "MeanWTA.tot", "t7.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "E:/Linkage/DSF code/private_land_conservation_DSF/preprocessing/par/"
```
In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
```{r, include=FALSE}
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
min(cost$Approx_tot_investment)
df$npv <- 1126260
```
Run through the first of simulations - this is for the first npv increment
```{r, results = FALSE, include=FALSE}
#Clean out the parallel processing folder
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)

df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- mixedsort(ll)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final19.RData"))
df_new0 <- df_final

llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- mixedsort(llse)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_19_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- mixedsort(llsd)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_19_sd.csv"))

```
Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
```{r}
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y
#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}
```
Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
```{r, results = FALSE, include=FALSE}
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  #check this
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- mixedsort(ll)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final19.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- mixedsort(llse)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_19_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- mixedsort(llsd)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_19_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
}
```
###Set up structures for the optimisation
Create matrix for constraint that says only 1 budget can be allocated per LGA
```{r}
#Create matrix where nrow and ncol = no. planning units
matrix_data=matrix(0,nrow=length(unique(df$puid)),ncol=length(unique(df$puid)))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}

```
Set up and run the optimisation (to select priority planning units (LGA's))
```{r, results = FALSE}
model <- list()
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))

#Export the results
#Load in base shp file for the planning units
study_region_outline <- readOGR("./preprocessing/study_region_outline.shp")
crs <- proj4string(study_region_outline)
shp.pu <- readOGR("./preprocessing/19_areas/19_only.shp")
shp.pu <- spTransform(shp.pu, crs)

###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(df$puid)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(df$puid)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre/1000000, nrow=length(unique(df$puid)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(df$puid)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results
make.maps.19areas(outfoldermaps, scen)
exp.vals.19areas(outfoldervals, scen)
```








##Approach 4 - Integreated approach (t7) single budget - Supporting information 8 (sensitivity analysis of landholder preferences model in the decision support framework)
Set up dataframe for appropriate scenario
Here we start with scenario 2 (the scenario that includes both conservation objectives and landholder preferences, or the "landholder informed" approach). In this scenario we are also using admin.mean (which is the average admin cost across all previous tenders). We allocate the same amount for each tender. 
```{r}
scen <- "approach4_sens_price_prob_area"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "MeanWTA.tot", "t7.w", "area")]
#Use these to make values homogeneous to interrogate the influence the variables have on the solutions
df$MeanAdopt <- 1
df$MeanWTA.tot <- 1
#df$area.w <- 1
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
```
In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
```{r, include=FALSE}
#Administrative cost data of processing past tenders is not available here, but cost of tenders are available through the Biodiversity Conservation Trust (BCT) with an appropriate licensing agreement
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)
```
Run through the first of simulations - this is for the first npv increment
```{r, results = FALSE, include=FALSE}
#Clean out the parallel processing folder
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

```
Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
```{r}
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investment)
incre <- (max_npv-min_npv)/y
#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}
```
Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
```{r, results = FALSE, include=FALSE}
##Create a new dataframe which has all puid's (LGAs), so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  #check this
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
}
```
###Set up structures for the optimisation
```{r}
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
matrix_data=matrix(0,nrow=length(unique(pu.df$LGA)),ncol=length(unique(pu.df$LGA)))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of koala area (in this current formulation this is just property area)
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}

```
Set up and run the optimisation (to select priority planning units (LGA's))
```{r, results = FALSE}
model <- list()
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))

```
###Export the results
```{r, results = FALSE}
#Load in base shp file for the planning units
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre/1000000, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results
make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)
```












##Figure 5 - Benefit over budget plot
```{r, include=FALSE}
files <- list.files("./outputs_v18/vals_v18/", pattern = "approach1_budget+", full.names = T, recursive = TRUE)
a <- read.csv(files[7])
b <- read.csv(files[8])
c <- read.csv(files[9])
d <- read.csv(files[10])
e <- read.csv(files[1])
f <- read.csv(files[2])
g <- read.csv(files[3])
h <- read.csv(files[4])
i <- read.csv(files[5])
j <- read.csv(files[6])

all.consben.approach1 <- c(a$cons.ben, b$cons.ben, c$cons.ben, d$cons.ben, e$cons.ben, f$cons.ben, g$cons.ben, h$cons.ben, i$cons.ben, j$cons.ben)

files <- list.files("./outputs_v18/vals_v18/", pattern = "approach2_budget", full.names = T, recursive = TRUE)
a <- read.csv(files[7])
b <- read.csv(files[8])
c <- read.csv(files[9])
d <- read.csv(files[10])
e <- read.csv(files[1])
f <- read.csv(files[2])
g <- read.csv(files[3])
h <- read.csv(files[4])
i <- read.csv(files[5])
j <- read.csv(files[6])

all.consben.approach2 <- c(a$cons.ben, b$cons.ben, c$cons.ben, d$cons.ben, e$cons.ben, f$cons.ben, g$cons.ben, h$cons.ben, i$cons.ben, j$cons.ben)

files <- list.files("./outputs_v18/vals_v18/", pattern = "approach3_budget", full.names = T, recursive = TRUE)
a <- read.csv(files[7])
b <- read.csv(files[8])
c <- read.csv(files[9])
d <- read.csv(files[10])
e <- read.csv(files[1])
f <- read.csv(files[2])
g <- read.csv(files[3])
h <- read.csv(files[4])
i <- read.csv(files[5])
j <- read.csv(files[6])

all.consben.approach3 <- c(a$cons.ben, b$cons.ben, c$cons.ben, d$cons.ben, e$cons.ben, f$cons.ben, g$cons.ben, h$cons.ben, i$cons.ben, j$cons.ben)

files <- list.files("./outputs_v18/vals_v18/", pattern = "approach4_budget+", full.names = T, recursive = TRUE)
a <- read.csv(files[7])
b <- read.csv(files[8])
c <- read.csv(files[9])
d <- read.csv(files[10])
e <- read.csv(files[1])
f <- read.csv(files[2])
g <- read.csv(files[3])
h <- read.csv(files[4])
i <- read.csv(files[5])
j <- read.csv(files[6])

all.consben.approach4 <- c(a$cons.ben, b$cons.ben, c$cons.ben, d$cons.ben, e$cons.ben, f$cons.ben, g$cons.ben, h$cons.ben, i$cons.ben, j$cons.ben)


df <- data.frame(rep(b.vec, 4), c(all.consben.approach1, all.consben.approach2, all.consben.approach3, all.consben.approach4), c(rep("Approach1", 10), rep("Approach2", 10), rep("Approach3", 10), rep("Approach4", 10)))
                 
colnames(df) <- c("Budget", "Consben", "Scenario")

df$Scenario <- factor(df$Scenario, levels = c('Approach1', 'Approach2', 'Approach3', 'Approach4'))

df$Budget <- df$Budget/1000000

library(ggplot2)

colors <- c("black", "black", "black", "black")
# Plot the data with custom line types and colors

s <- ggplot(df, aes(x = Budget, y = Consben, group = Scenario, linetype = Scenario, color = `Scenario`)) + 
  geom_line(size = 1) +
  scale_color_manual(values = colors) +
  scale_linetype_manual(values = c("solid", "dashed", "dotted", "twodash")) +  # Specify the line types here
  theme(panel.grid = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        legend.background = element_blank()) +
  xlab("Budget M $AUD") +
  ylab("Summed koala landscape capacity (future climate)")

ggsave("./outputs_v18/figures/df_lines_cons_incl_test.png", s, width = 10, height = 6)

```










##Simulation sensitivity analysis (approach4, b = 20300000)
```{r}
sim.vec <- seq(0, 1000, by=50)
```

```{r}
scen <- "approach4"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t0", "MeanAdopt", "MeanWTA.tot", "t0.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "E:/Linkage/DSF code/private_land_conservation_DSF/preprocessing/par/"
```
In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
```{r, include=FALSE}
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
min(cost$Approx_tot_investment)
df$npv <- 1126260
```
Run through the first of simulations - this is for the first npv increment
```{r, results = FALSE, include=FALSE}
df.mean.to.plot <- data.frame()
for (q in sim.vec){
df_new0 <- properties.par(split.df, q)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))

llse <- list.files(path=pt, pattern = '.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_scenario2_20300000_93.csv"))

df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y
#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  #check this
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, q)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = '.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_scenario2_20300000_93.csv"))
  assign(paste0("se.df", i), se.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
}

se.df.all <- se.df0
for (i in 1:y){
  se.df.name <- paste0("se.df", i)
  se.df.all.incre <- get(se.df.name)
  #Make the cost of the NA values so high that they would never be selected
  se.df.all <- merge(se.df.all, se.df.all.incre, by = "X")
}

se.df.all$testMean <- rowMeans(se.df.all, na.rm=TRUE)
mean <- mean(se.df.all$testMean)
df.mean.se <- data.frame(q, mean)
df.mean.to.plot <- rbind(df.mean.to.plot, df.mean.se)

}

write.csv(df.mean.to.plot, file = paste0(outfoldervals, "./", scen, "_budget_", b, "_sensitivity_analysis.csv"), row.names = TRUE)
se.to.plot <- read_csv("outputs_v12/vals_v12/sensitivity_analysis/scenario2_budget_20300000_sensitivity_analysis_v1.csv")
jpeg(file="D:/Linkage/DSF code/private_land_conservation_DSF/outputs_v12/vals_v12/sensitivity_analysis/sensitivity_analysis_v2.jpeg")
plot(se.to.plot$q, se.to.plot$mean, xlab="No. of simulations", ylab="Mean standard error of conservation beneft", pch = 19)
dev.off()

time <- Sys.time()
write.csv(time, file = ("./outputs/time_finished.csv"))

```



##Additional sensitivity analysis (Reviewer request)
The amount of properties selected for site assessment (step 2 in Figure S1.3.1), the amount of successful bids that are selected (step 5 in Figure S1.3.1), and the use of the Biodiversity ##Valuation score (step 4 in Figure S1.3.1)
```{r, include=FALSE}
#If not already loaded, need this for the make maps function
shp.pu <- readOGR("./preprocessing/LGA_study_region_clip_project.shp")

scen <- "approach4"
df <- df.org[c("LGA", "NewPropID", "npv.mean", "admin.mean", "rank.t7", "MeanAdopt", "MeanWTA.tot", "t7.w", "area.w")]
colnames(df) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")

#In this case we first want to use the minimum npv value (for LGAs), this sets up for the next bit of the code which is increments
cost <- read.csv("./raw_data/bct_cost_data_26_7_22.csv")
df$npv <- min(cost$Approx_tot_investment)

#For parallel processing
#Split larger dataframe into list of smaller dataframes (ie. one for each planning unit)
split.df <- split(df, df$puid)
#Need to specify path to parallel processed outputs
pt <- "./preprocessing/par/"
# List all files in the folder
files <- list.files("./preprocessing/par", full.names = TRUE)
# Delete all files
sapply(files, unlink)
#Run  the simulations
df_new0 <- properties.par(split.df, sim)
ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
ll <- ll[1:length(ll)-1]
file.remove(ll)
load(paste0(pt, "df_final93.RData"))
df_new0 <- df_final
df_new0 <- df_new0[rowSums(df_new0[])>0,]

#Add se and sd values
llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
llse <- llse[1:length(llse)-1]
file.remove(llse)
se.df0 <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))

llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
llsd <- llsd[1:length(llsd)-1]
file.remove(llsd)
sd.df0 <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))

#Create NPV increments, with the min being the lowest NPV reported and the max being the highest. 
#y is the number of increments, 65 increments equates to roughly increments of $1,000,000 
y <- 33
min_npv <- min(cost$Approx_tot_investment)
max_npv <- max(cost$Approx_tot_investmen)
incre <- (max_npv-min_npv)/y

#Add new NPV values to the dataframe
df$npv.min <- min_npv
for(i in 1:y) {                                   # Head of for-loop
  new <- rep(min_npv, nrow(df))                   # Create data for new column
  df[ , ncol(df) + 1] <- new + incre*i            # Append new column
  colnames(df)[ncol(df)] <- paste0("npv", i)      # Rename column name
}

#Now we need to run separate simulations for each LGA budget increment - this creates y new dataframes
##Create a new dataframe which has all puid's, so that new dataframes can be merged to this
#This ensures that even if an LGA has no successful bids (ie. the bidders are higher than the NPV), we capture that in the optimisiaton
#So we are merging everything to the complete set of LGA ids
df.to.merge <- data.frame(puid = unique(df$puid))
df_new0 <- merge(df.to.merge, df_new0, by = "puid", all = TRUE)

#Create a new dataframe for each increment (y above)
for (i in 1:y){
  df1 <- df[c("puid", "NewPropID", paste0("npv", i), "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")]
  
  colnames(df1) <- c("puid", "NewPropID", "npv", "admin.cost", "property", "prob.property", "bid.price", "cons.benefit", "area")
  
  split.df1 <- split(df1, df$puid)
  properties.par(split.df1, sim)
  ll <- list.files(path=pt, pattern = '.RData', full.names=TRUE)
  ll <- ll[1:length(ll)-1]
  file.remove(ll)
  load(paste0(pt, "df_final93.RData"))
  
  llse <- list.files(path=pt, pattern = 'se.csv', full.names=TRUE)
  llse <- llse[1:length(llse)-1]
  file.remove(llse)
  se.df <- read.csv(paste0(pt, "SE_", scen, "_", b, "_93_se.csv"))
  assign(paste0("se.df", i), se.df)

  llsd <- list.files(path=pt, pattern = 'sd.csv', full.names=TRUE)
  llsd <- llsd[1:length(llsd)-1]
  file.remove(llsd)
  sd.df <- read.csv(paste0(pt, "SD_", scen, "_", b, "_93_sd.csv"))
  assign(paste0("sd.df", i), sd.df)
  
  df_new <- df_final
  assign(paste0("df_new", i), df_new)
  merged <- merge(df.to.merge, get(paste0("df_new", i)), by = "puid", all = TRUE)
  assign(paste0("df_new", i), merged)
  assign(paste0("df_new", i), get(paste0("df_new", i))[rowSums(get(paste0("df_new", i))[])>0,])
  
}
###Set up structures for the optimisation
#Create matrix for constraint that says only 1 budget can be allocated per LGA
#Create matrix where nrow and ncol = no. planning units
df_new <- df_new[rowSums(df_new[])>0,]
matrix_data=matrix(0,nrow=nrow(df_new),ncol=nrow(df_new))
# assign value to 1
diag(matrix_data)=1
#We need as many replicates as there are cost incremements
pu_matrix <- do.call(cbind, replicate(y+1,matrix_data, simplify=FALSE))
#Join the NPV data to this matrix, for each cost increment
#First make a long string of all costs
#Start with the original
npv.all.incre <- df_new0$npv
for (i in 1:y){
  df.name <- paste0("df_new", i)
  npv.all.incre <- c(npv.all.incre, get(df.name)$npv)
  #Hack - make the cost of the NA values so high that they would never be selected
  npv.all.incre[is.na(npv.all.incre)] = 1000000000000000
}

#Join it to the planning unit matrix
m <- rbind(pu_matrix, npv.all.incre)
#Make sparse
constr_matrix <- drop0(m, tol = 0)

#Make a long string of koala area
karea.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  karea.all.incre <- c(karea.all.incre, get(df.name)$area)
  karea.all.incre[is.na(karea.all.incre)] = 0
}

#Make a long string of all conservation benefit
cons.all.incre <- df_new0$cons.benefit
for (i in 1:y){
  df.name <- paste0("df_new", i)
  cons.all.incre <- c(cons.all.incre, get(df.name)$cons.benefit)
  cons.all.incre[is.na(cons.all.incre)] = 0
}

#Make a long string of all area benefit
area.all.incre <- df_new0$area
for (i in 1:y){
  df.name <- paste0("df_new", i)
  area.all.incre <- c(area.all.incre, get(df.name)$area)
  area.all.incre[is.na(area.all.incre)] = 0
}

#Make a long string of all SD
sd.all.incre <- sd.df0[1:nrow(sd.df0),2]
for (i in 1:y){
  df.name <- paste0("sd.df", i)
  sd.all.incre <- c(sd.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  sd.all.incre[is.na(sd.all.incre)] = 0
}

#Make a long string of all SE
se.all.incre <- se.df0[1:nrow(se.df0),2]
for (i in 1:y){
  df.name <- paste0("se.df", i)
  se.all.incre <- c(se.all.incre, get(df.name)[1:nrow(get(df.name)),2])
  se.all.incre[is.na(se.all.incre)] = 0
}


for (b in b.vec[1:10]){

#Set up and run the optimisation (to select priority planning units (LGA's))
model <- list()
#model$A <- matrix(c(data_frame_test$npv), nrow=1)
#nrow needs to be the number of planning units/LGA's + 1 (the 1 is the data vector)
model$A <- constr_matrix
model$obj        <- cons.all.incre
model$modelsense <- 'max'
model$rhs        <- c(rep(1, nrow(constr_matrix)-1), b)
model$sense      <- c(rep('<=',nrow(constr_matrix)-1),'<')
model$vtype      <- c('B')

params <- list(OutputFlag=0)

#Run
result <- gurobi(model, params)

print('Solution:')
print(result$objval)
print(result$x)
#save(result, file=paste0(outfolder, "./", scen, "_", b, ".RData"))
save(result, file = paste0(outfolder, "./", scen, "_", b, ".RData"))

###Export the results
###Need to be super careful about indexing here
#First create a solutions matrix to show which LGA was selected with which budget increment
solutions <- matrix(result$x, nrow=length(unique(pu.df$LGA)))
binary.sol <- rowSums(solutions)
#FOR HEAT MAPS
#Then put values to this solutions matrix so we can visualise the selected budget amounts
budget.matrix <- matrix(npv.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.budget.matrix <- solutions*budget.matrix
budget.sol <- rowSums(solutions.budget.matrix)
#Change budget.sol to proportions instead of exact values
budget.sol.prop <- budget.sol/b*100

#Do the same for conservation benefit
consben.matrix <- matrix(cons.all.incre, nrow=length(unique(pu.df$LGA)))
solutions.consben.matrix <- solutions*consben.matrix
consben.sol <- rowSums(solutions.consben.matrix)

#And make one from consben/area
area.matrix <- matrix(((cons.all.incre/1000000)/(area.all.incre/1000000)), nrow=length(unique(pu.df$LGA)))
solutions.consben.area.matrix <- solutions*area.matrix
solutions.consben.area.matrix[is.nan(solutions.consben.area.matrix)] <- 0
consben.area.sol <- rowSums(solutions.consben.area.matrix)

#Generate the results

make.maps(outfoldermaps, scen)
exp.vals(outfoldervals, scen)

}
```











