library(truncnorm)
library(plyr)
library(ggplot2)
library(scales)
library(reshape2)



## Full write up of the ABM will use the ODD template: https://www.comses.net/resources/standards/

## The model’s specific purpose is to predict the relative (per person and per unit of funding) effort required of applicants who prepare proposals
## in response to one- and two-stage funding processes and so to determine which, if either, of the two tends to require the least effort from applicants.

## The key features of the model that enable this are agents' (from here on applicants') tendency to discount the level of effort
## they will need to expend preparing a full proposal following a successful outline application. Without this, propensity to apply would not
## vary across call types.
## Applicants' desire to apply is driven by the need to acquire a pre-determined level of funding  and varies depending on
## their specific circumstances and characteristics.

## The model contains just two entities: applicants and a call whose characteristics are updated in a call update cycle. This is what marks the passage of time.
## Many of the user-defined variables are parameters of probability distributions (normal, truncated normal, Poisson, Beta) that are used to define values for applicants and call updates.


##################################################################
## place key user-defined variables here for ease of adjustment ##
##################################################################


n_applicants <- 1000                                 # number of applicants
effort_accrual_rate <- 1                             # rate at which applicants accrue the effort needed to submit an application
n_sims <- 49                                         # when carrying out multiple runs, how many to do. Square numbers allow a nicer presentation of grids of results.
n_call_updates <- 240                                # how many call_updates to run the simulation for
standard_grant_length <- n_call_updates              # standard duration of award, if made, in terms of number of call_updates. The equivalence of the two is needed in order to maintain the stability of the model
outline_perc <- 0.5                                  # fraction of calls that make use of an outline stage. There is no real reason not to make this 50:50.
full_proposal_ratio <- 1                             # ratio of the effort required to create an outline to that required to create a full proposal. Note that this is used as a basis for sampling and is not used as a fixed value. Its effect is explored later on by stsytematically varying it.
cv_outline_to_full <- 0.2                            # used to define how variable the values assigned with full_proposal_ratio are. Larger value = more variability. 'cv' as it's a coefficient of variation.
desired_full_success_rate <- 0.5                     # the % of full proposals to be funded following an outline process. In real life the number invited is often adjusted so that this is 50%.
time_to_end_moderator <- -0.1                        # a number that moderates how much the time to end of funding influences applicants' decisions about whether to apply. One of the less important variables TBH.
application_effort_multiplier <- 12                  # the number of times larger than the effort_accrual_rate that the effort required to prepare an application is. The bigger this number, the less chance there is of accruing the necessary effort. This number is important in making the simulations plausible and stable.
spend_moderator <- 4                                 # very important for balancing the tendency for probability of application to increase with the drag on it that arises from applicants retaining a memory of how much funding they have received. Does not reflect a real-world behaviour.
minimum_applicants <- 2								 # the minimum number of applicants that we allow to apply to a call. If the number is below this, an alternative process is followed.


#########################################################
## some less important, but still necessary, variables ##
#########################################################


fraction_of_total_per_year <- n_call_updates/standard_grant_length   	# derived value, be careful changing/defining manually. Used to work out what sum of funding to offer in calls
mean_cycle_target  <- 4                                      			# used to determine an initial value for each applicant's desire for funding
sd_cycle_target    <- 0.5                                    			# sd for that value, to be used to create a value sampled from a lognormal distribution
realism_beta_1 <- 5                                          			# used to produce a sampled value describing how realistic applicants are when it comes to assessing how much effort the full proposal stage of an outline process will require
realism_beta_2 <- 10                                          			# used to produce a sampled value describing how realistic applicants are when it comes to assessing how much effort the full proposal stage of an outline process will require
quality_min_beta_1 <- 2                                        			# one of four parameters used to define a beta distribution that determines applicants' inherent quality, which is then reflected in their probability of receiving funding when they prepare a proposal
quality_max_beta_1 <- 20                                       			# as above
quality_min_beta_2 <- 4                                        			# as above
quality_max_beta_2 <- 7                                        			# as above
target_fraction_beta_1 <- 5                                    			# to define a beta distribution that will determine what % of their desired amount applicants have at the start of the simulation
target_fraction_beta_2 <- 3                                    			# to define a beta distribution that will determine what % of their desired amount applicants have at the start of the simulation
burn_in_fraction <- 10                                          		# to determine what fraction of results to discard as the process stabilises, when reporting results(value used is 1/burn_in_fraction)
burn_out_fraction <- 10                                          		# to determine what fraction of results to discard before the process becomes homogenous, in terms of application probabilities, when reporting results(value used is 1/burn_out_fraction)
preceding_call_moderation_parameter <- 0.5                     			# to modify the probability of application based on the success rate of the preceding call. Larger number leads to stronger moderation. This is one of the less critical variables, but allows the introduction of plausible behaviour.
determined_applicants <- 50                                 			# a means of forcing application in cases where a call attracts no applicants. The fraction attracted in 'one in determined_applicants' and not this number itself (so that higher values lead to fewer applicants).
cycle_target_inflation <- 1                               				# increase in applicants' desire for funding, expressed as multiplier for each month - like compounding inflation. This may help sustain applications
minimum_p_of_application <- 0.01										# the lowest probability of application allowed for any applicant
systematic_variations <- 5											# for analysis which varies a parameter systematically, how many simulations to run (one each per variation). This is the bit that takes most time.
bootstrap_samples <- 1000												# for analysing the results using bootstrapping of the simulation outputs
plot_width <- 5
plot_height <- 5


##################################################################################################################################################################
## The next section describes functions that initialise applicants and call updates.
## These functions are called at the start of a simulation only
##
## First, the function to create_applicants() - that is, the agents in the model.
## This has no arguments as it is populated solely using the variables defined above
##
## Each applicant has:
##		an id (which can be used to monitor trajectories of individual applicants, if desired
##		a desired amount of funding, derived from a truncated lognormal distribution
## 		two parameters defining a Beta distribution that will be sampled from to determine the quality of a proposal submitted by the applicant when they respond to a call update
##		a rate of accrual of effort available to apply, sampled from a Poisson distribution. Some are keen applicants (high accrual rate), some are not inherently motivated to do so (low accrual rate)
## 		an initial shortfall percentage from the cycle_target for funding, sampled from a Beta distribution. A larger shortfall suggests the applicant is more likely to apply earlier on
##		a time until their current funding ends, determined on the basis of a uniform distribution
##		an initial accrued effort, which relies on both a uniform and a Poisson distribution. This is just the starting value. The accrued effort changes over time for each applicant.
##		an effort realism, drawn from a Beta distribution defined by the relevant environment variables. The higher the value, the less that an applicant ignores/discounts the effort required to prepare a full proposal after a successful outline (hence calling it realism)
##		an initial value of the amount of funding that an applicant has in a month that gets reset to 0 in every month that does not see an award made to this applicant
##		an initial probability of applying, derived from other initialised applicant variables


		create_applicants <- function() {

applicants <- data.frame(

                          applicant_id = 1:n_applicants,
                          cycle_target = round(10^(rtruncnorm(  n = n_applicants,
                                                                  a = 0,
                                                                  b = Inf,
                                                                  mean = mean_cycle_target,
                                                                  sd = sd_cycle_target)), digits = -2),
                          quality_beta_1_val = runif(n = n_applicants, min = quality_min_beta_1, max = quality_max_beta_1),
                          quality_beta_2_val = runif(n = n_applicants, min = quality_min_beta_2, max = quality_max_beta_2),
                          effort_rate = rpois(n_applicants, effort_accrual_rate),
                          initial_target_fraction = rbeta(n_applicants, target_fraction_beta_1, target_fraction_beta_2),
                          time_to_end = round(standard_grant_length - (standard_grant_length * runif(n_applicants)), digits = 0),
                          accrued_effort = (standard_grant_length - round(standard_grant_length - (standard_grant_length * runif(n_applicants)), digits = 0)) * rpois(n_applicants, effort_accrual_rate),
                          effort_realism = rbeta(n_applicants, realism_beta_1, realism_beta_2)
                        )

applicants$effort_rate <- ifelse(applicants$effort_rate == 0, 1, applicants$effort_rate)
applicants$cycle_funding <- applicants$cycle_target * applicants$initial_target_fraction      
applicants$time_to_end <- ifelse(applicants$time_to_end < 1, 1, applicants$time_to_end)
applicants$p_of_application <- (applicants$accrued_effort) / (applicants$initial_target_fraction * (applicants$time_to_end^time_to_end_moderator))
applicants$p_of_application <- applicants$p_of_application/max(applicants$p_of_application)
applicants$p_of_application[applicants$p_of_application < 0] <- 0

      return(applicants)

          }


##############################################################
## Second is the function to create_calls()
## The values that will be used to update the call to which applicants do or do not respond are created before the simulation runs
## and then indexed from a list when the call needs to be updated. it is useful to be able to see all the calls in one go.
## Calls can be either outline or straight_to_full.
## The process for determining the level of effort requireed to apply to a call ensures that the total effort required does not vary across call types.
## That is, that applicants submitting an outline and then a full proposal will on average have to expend the saem amount of effort as do
## applicants applying to straight-to-full calls. This is why the process may appear a bit complicated.
## Each call has:
##		an ID, which is also a way of keeping track of the call update as there is one call per update
##		an indicator saying whether it is an outline call (1) or a straight-to-full call (0)
##		a total value available to fund proposals submitted to the call. This is calculated in a way that should keep the amount of money in the system roughly constant
##		a level of effort associated with preparing an outline proposal. For straight-to-full calls this is set as 0. For outlines it is derived from a Poisson distribution
##		a level of effort associated with preparing a full proposal. For straight-to-full calls this is the total effort. For outline calls it is the total effort minus the effort expended to prepare an outline
##		a total effort, which is simply the sum of the outline and full effort
## effort is in arbitrary units
## create_calls() has one argument: the call_total, whcich is derived from the variables given above and reflects the funding in the system at initialisation


		create_calls <- function(call_total) {

calls <- data.frame(
                    call_id = 1:n_call_updates,
                    outline_ind = sample(x = 0:1, n_call_updates, replace = TRUE, prob = c(1-outline_perc, outline_perc)),
                    call_value = rnorm(n_call_updates, mean = call_total, sd = call_total/5)
                   )
calls$outline_effort <- rpois(n_call_updates, effort_accrual_rate * application_effort_multiplier)					# effort required to prepare an outline
calls$outline_effort <- ifelse(calls$outline_effort < 1, 1, calls$outline_effort)
calls$full_effort <- round(calls$outline_effort + (calls$outline_effort * rtruncnorm(	n = n_call_updates,			# outline effort plus additional effort required to write a full proposal after an outline
                                                                               			a = 0,
                                                                               			b = Inf,
                                                                               			mean = full_proposal_ratio,
                                                                               			sd = full_proposal_ratio*cv_outline_to_full) ), digits = 0)
calls$full_effort <- ifelse(calls$outline_ind == 1, calls$full_effort - calls$outline_effort, calls$full_effort)	# if outline, remove the outline effort from the full effort total and full_effort is now only the additional effort required
calls$outline_effort <- ifelse(calls$outline_ind == 0, 0, calls$outline_effort)										# if straight to full, outline effort is 0
calls$total_effort <- calls$outline_effort + calls$full_effort														# total effort required to  submit a full proposal, irrespective of call type
return(calls)

}




##############################################################################################################################
##############################################################################################################################
## The main function, used to run simulations
##
## The general idea is that
##
##		1) Each month the call updates
## 		2) Applicants decide whether to apply to it, based on their personal characteristics and those of the call.
## 		3) If they do not apply, their accrued_effort is increased by their effort_rate, and the time until they have 0% of their target funding decreases by one unit.
## 		4) If they apply their accrued_effort is reduced by the amount of effort it takes to apply
##		5) Their monthly funding and track record of funding are adjusted depending on what happened when they applied
##		6) Their level of accrued effort is adjusted to reflect the passage of time, and their probability of applying is recalculated
##		7) The call is updated with the next set of call characteristics
##
## Management of the simulation is done with a list of lists called 'sim' which contains five lists:
##
##		[[1]] is the applicants. The state of each applicant at the end of each call update is recorded in the relevant element of this list
##		[[2]] is the call update values. These are static, having been pre-calculated
##		[[3]] is a list to contain diagnostic outputs of the simulation. Each list element contains a range of measures/descriptors of what happened in the relevant call update.
##		[[4]] is a list of matrices that records the track record of funding for each applicant.
##		[[5]] is a list that is used solely to record the diversity of probabilities of application across applicants
##
## While this is called an ABM, deep down it's a dataframe of applicants (sim[[1]]) that gets updated in light of events.
##
## It is essential that the process is stable: that is, that it can run for a useful number of call updates without problems occurring.
## These problems will chiefly be: nobody applying, too many people applying, sucecss rates being implausible.
## It also needs to result in realistic 'pattern's (as defined in the ODD framework). There should be characteristics of the system that plausibly mimic,
## but not necessarily recreate perfectly, a real system.
## This makes the whole thing a delicate balancing act. Many of the features and parameters of the model exist solely to achieve this balance.
## Others exist ebcause they mimic likely behaviours or decision processes and so are needed to create a simulation that reflects reality
## The main function - run_simulations() - has no arguments


        run_simulations <- function() {

								
						#################################################################################################
						# First, initialise everything - create required lists, and set the initial state of applicants #
						#################################################################################################

applicants <- create_applicants()											# create the applicants
calls <- create_calls(call_total = sum(applicants$cycle_funding))			# create the calls

applicants_list <- vector(mode = "list", length = n_call_updates)           # indexed as [[1]] - this is the updatable df of applicants, recording their history and behaviour
call_list <- split(calls, calls$call_id)                             	 	# indexed as [[2]] - the values to be used in call updates
applicants_list[[1]] <- applicants                                    		# initiate the model run with the initial df of applicants
diagnostics <- vector(mode = "list", length = n_call_updates)               # indexed as [[3]] - for analysis of the results
funding_matrix <- vector(mode = "list", length = nrow(applicants))    		# indexed as [[4]] - to record funding history of each applicant
papps <- vector(mode = "list", length = n_call_updates)						# indexed as [[5]] - to record the variation in p_of_application across call updates

for (a in 1:length(funding_matrix)) {										# fill the matrix with zeroes
                                      funding_matrix[[a]] <- matrix(0, nrow = n_call_updates, ncol = n_call_updates)
                                    }

sim <- list(applicants_list, call_list, diagnostics, funding_matrix, papps)  		# create list of the five main lists.


			###########################################################################################################################
			# Then a big 'for...' loop that applies all the rules based on the initial state of applicants and the call update values #
			###########################################################################################################################


for(i in 1:n_call_updates) {

    	message(paste0("Month ", i, " of ", n_call_updates, " in simulation ", k, " of ", n_sims))		# while it can be useful to see the simulation progressing, this is not essential and dropping it saves a bit of time

  temp <- sim[[1]][[i]]														# the working df, in/to which things happen and which is used to describe the next update of applicants, will be called 'temp'
  temp$adjusted_effort <- temp$accrued_effort * temp$p_of_application		# determine how much effort applicants have available to them to prepare a proposal

  # determine relevant effort perceived by each applicant, which varies depending on call type

   if(sim[[2]][[i]]$outline_ind == 0) {

             perceived_effort <-  rep(sim[[2]][[i]]$full_effort, times = nrow(temp))

              } else {

             perceived_effort <- sim[[2]][[i]]$full_effort*sim[[1]][[i]]$effort_realism + sim[[2]][[i]]$outline_effort

              }

  temp$perceived_effort <- perceived_effort														# add the perceived_effort to temp

  temp$applies_to_this_call <- ifelse(temp$perceived_effort <= temp$adjusted_effort, 1, 0)		# the central decision - does the applicant prepare a proposal in response to this call update?
  temp$applies_to_this_call[is.na(temp$applies_to_this_call)] <- 0								# to catch errors if they occur and define those affected by them as non-applicants

  message(paste0(sum(temp$applies_to_this_call), " applicants to this call"))

  																								# sometimes no applicants apply, and we need to handle this
  																								# apply a rule that if < a defined number of all applicants applies, the most likely applicants to apply will apply anyway, up to a stochastically-defined % of the total
  																								# this may keep otherwise unviable processes running, so we need to indicate to what extent this is happening so that simulations which make excessive use of this feature can be discarded

  null_value_applicants <- as.integer(nrow(temp) / rpois(1, determined_applicants))
	
  if(null_value_applicants <= 2 )  { null_value_applicants <- determined_applicants }           # a final backstop in case rpois returns 0, but this is unlikely for useful values of determined_applicants

   if(length(temp$applies_to_this_call[temp$applies_to_this_call == 1]) < minimum_applicants) {

            keen_applicants <- temp[, c("applicant_id", "p_of_application")]						# identify a set of applicants who most want to apply (based on their p_of_application)
            keen_applicants <- keen_applicants[order(-keen_applicants$p_of_application), ]			# order by p_of_application (descending)
            keen_applicants <- keen_applicants[1:null_value_applicants, c("applicant_id")]			# get id's of the keen applicants
            temp$applies_to_this_call <- ifelse(temp$applicant_id %in% keen_applicants, 1, 0)		# make them apply
            temp$is_unpopular_call <- 1																# indicate that this process has been used for this call update
            temp$accrued_effort <- (standard_grant_length - round(standard_grant_length - (standard_grant_length * runif(n_applicants)), digits = 0)) * rpois(n_applicants, effort_accrual_rate)
            temp$p_of_application <- (temp$accrued_effort) / (temp$initial_target_fraction * (temp$time_to_end^time_to_end_moderator))
            temp$p_of_application <- temp$p_of_application/max(temp$p_of_application, na.rm = TRUE)

             } else {

            temp$is_unpopular_call <- 0																# if the call is not unpopular, indicate that

            }

  
					##########################################################################
					# Now that we know who is going to apply, implement the decision process #
					##########################################################################

		
    meeting <- temp[which(temp$applies_to_this_call == 1) , c("applicant_id", "cycle_target", "quality_beta_1_val", "quality_beta_2_val")]	# extract all those applying and name them the 'meeting'
    meeting$proposal_quality <- rbeta(nrow(meeting), meeting$quality_beta_1_val, meeting$quality_beta_2_val)									# determine the quality of the proposal which each applicant prepares
    meeting <- meeting[order(-meeting$proposal_quality), ]																						# order by proposal_quality

      # later on the cycle_target is multiplied by standard_grant_length to give the grant value, but standard_grant_length is inflated to moderate application behaviour
      # without adjustment this leads to low numbers funded, and low success rates so adjust the meeting's (NB not the applicant's) value of cycle_target to compensate for this
	  # this is where spend_moderator comes in, and why it is so powerful

    meeting$cycle_target <- meeting$cycle_target/spend_moderator

	  # here the process has to split into two forms - one for outline + full processes and one for straight-to-full processes
      # the key pieces of informtaion derived here are the effort expended in this meeting, whether applicants were funded, and if so how much they got
      # also determine meeting success rate - to be used to modify all applicants' propensity to apply in the next month. For this use only full proposal rate in the case of outline processes as only that data will typically be public

  				if(sim[[2]][[i]]$outline_ind == 0) {      												# for straight-to-full calls

        		message("Straight to full call")

	  # start with a process that identifies whether any applicants are going to request more than the amount of funding available in the call
	  # remove all full proposals which are larger than the budget

							if(nrow(meeting[which(meeting$cycle_target * standard_grant_length < sim[[2]][[i]]$call_value), ]) > 0) {

        					meeting <- meeting[which(meeting$cycle_target * standard_grant_length < sim[[2]][[i]]$call_value), ]  

            				}


	  # funded proposals are those for which the cumulative total requested in (ranked) proposals in the meeting does not exceed the amount of funding available in the call

	meeting$cumulative_requested <- cumsum(meeting$cycle_target * standard_grant_length)
    meeting$outcome <- ifelse(meeting$cumulative_requested <= sim[[2]][[i]]$call_value, 1, 0)
      
			if(sum(meeting$outcome == 0)) meeting$outcome[1] <- 1                   # must have at least one funded in each meeting
    
	meeting$invite <- 0																# although this is not an outline call, create this column to allow further processing later
    effort_expended <- sim[[2]][[i]]$full_effort * nrow(meeting)					# calculate the total effort expended in this meeting. Because it is straight-to-full it's just the full_effort multiplied by the number of applicants
    meeting_success_rate <- sum(meeting$outcome)/nrow(meeting)						# calculate the meeting success rate

        	message(paste0("Applicants applying = ", nrow(meeting)))
    
	meeting_nrow <- nrow(meeting)

          } else {                          # for outline calls. The process is conceptually similar except that in the first stage applicants are invited (or not) before funding decisions are made.

      
        		message("Outline call")

    meeting <- meeting[which(meeting$cycle_target * standard_grant_length < sim[[2]][[i]]$call_value), ]  
    meeting$cumulative_requested <- cumsum(meeting$cycle_target * standard_grant_length)
        
				message(paste0("Applied at outline = ", nrow(meeting)))

    meeting$invite <- ifelse(meeting$cumulative_requested <= (1/desired_full_success_rate)*sim[[2]][[i]]$call_value, 1, 0)		# applying the rule that the success rate at full proposal ought to be simialr to the desired_full_success_rate
    outline_effort_expended <- sim[[2]][[i]]$outline_effort * nrow(meeting)
    meeting <- meeting[which(meeting$invite == 1), ]								# retain only invited applicants

        		message(paste0("Invited to full = ", nrow(meeting)))

    meeting$proposal_quality <- rbeta(nrow(meeting), meeting$quality_beta_1_val, meeting$quality_beta_2_val)  				# recalculate quality for the full proposal - could retain original quality score but this is likely to be closer to the truth
    meeting <- meeting[order(-meeting$proposal_quality), ]
    meeting$cumulative_requested <- cumsum(meeting$cycle_target * standard_grant_length)
    full_effort_expended <- sim[[2]][[i]]$full_effort * nrow(meeting)
    meeting$outcome <- ifelse(meeting$cumulative_requested <= sim[[2]][[i]]$call_value, 1, 0)
      
			if(sum(meeting$outcome == 0)) meeting$outcome[1] <- 1                                        # must have at least one funded in each meeting
    
	effort_expended <- outline_effort_expended + full_effort_expended
    meeting_success_rate <- sum(meeting$outcome)/nrow(meeting)
    meeting_nrow <- nrow(meeting)

            }

										##########################
										# End of meeting process #
										##########################

		# next step is to add the outcome information into the applicants' records and update their states


    temp <- merge(temp, meeting[ , c("applicant_id", "outcome", "invite")], by.x = "applicant_id", by.y = "applicant_id", all.x = TRUE)
    temp$outcome <- ifelse(is.na(temp$outcome), 0, temp$outcome)										# applicants who did not apply will be NA after the merge, so correct this
    temp$award_size <- ifelse(temp$outcome == 1, temp$cycle_target*standard_grant_length, 0)			# Note that this is the applicant's true monthly target, not the one in the meeting 
    temp$invite[is.na(temp$invite)] <- 0            													# convert NA to 0 to indicate that there was no invitation to submit a full proposal, whether because they failed or did not even apply
      

      # to record the last time that an award was made to an applicant 

      if(i == 1) {

          temp$last_funded_in_round <- ifelse(temp$outcome == 1, i, NA)  

            } else {

          temp$last_funded_in_round <-  ifelse(!is.na(temp$last_funded_in_round) & temp$outcome != 1, temp$last_funded_in_round,
                                        ifelse(is.na(temp$last_funded_in_round) & temp$outcome != 1, NA, i))

            }


 	# modify time_to_end to reflect the outcome of the call update

      
        temp$time_to_end <- ifelse(temp$outcome == 1, standard_grant_length/spend_moderator, temp$time_to_end - 1)	# divide by spend_moderator to induce repeated application behaviour a bit more strongly in successful applicants


    # calculate the accrued total funding for each applicant. This balances out the tendency for p_application to inflate and so keeps the process stable
    # we achieve this by populating the rows of sim[[4]] i.e. the funding_matrix, month-by-month with the relevant values and summing the relevant column for each applicant


          for(w in 1:nrow(temp)) {

            time_to_end <- temp$time_to_end[w]

            if(time_to_end < 0|is.na(time_to_end)) {time_to_end <- 1}

            before <-   rep(0, times = (i - 1))                                        # populate with zero before the most recent award started
            now <-      rep(temp$cycle_funding[w], times = time_to_end)                # populate with relevant number of current (i.e. before the most recent award has started) cycle_funding values

            all_payments <- c(before, now)                                             # combine

            if(length(all_payments) < n_call_updates) {

                                                all_payments <- c(all_payments, rep(0, times = n_call_updates - length(all_payments)))

                                              } else {
                                                
                                                all_payments <- all_payments[1:n_call_updates]

                                              }

            sim[[4]][[w]][i, ] <- all_payments 
            
                                  }

    # determine how much each applicant's most recent accrued monthly funding is, and what fraction of the target this is

          temp$cycle_funding <- temp$award_size/standard_grant_length      		# reset to zero ready for the next month, in cases where the applicant did not receive new funding
          temp$accrual_of_cycle_funding <- ldply(lapply(sim[[4]], function(x) {

                                        total_funding <- sum(x[ ,i])

                                          }))$V1
                                    

          temp$target_fraction <- temp$accrual_of_cycle_funding / temp$cycle_target


    # add newly-accrued effort to each applicant and adjust this if they applied
    # if they are funded AND target_fraction is > 1 accrued_effort is set to their baseline level


        temp$accrued_effort <- ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 1 & temp$outcome == 1 & temp$target_fraction >= 1,      	# applied to outline call, and the full proposal was funded
                                                          temp$effort_rate,                                                                                  	# and they are sated
                                ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 1 & temp$outcome == 1 & temp$target_fraction < 1,      	# applied to outline call, and the full proposal was funded
                                                          temp$accrued_effort - sim[[2]][[i]]$total_effort + temp$effort_rate,                              	# but they are not yet sated
                                ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 1 & temp$invite == 0,                        				# applied to outline call, but were not invited
                                                          temp$accrued_effort - sim[[2]][[i]]$outline_effort + temp$effort_rate,
                                ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 1 & temp$outcome == 0 & temp$invite == 1,    				# applied to outline call, were invited but were not funded
                                                          temp$accrued_effort - sim[[2]][[i]]$total_effort + temp$effort_rate,
                                ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 0 & temp$target_fraction >= 1,               				# applied to full call and sated
                                                          temp$effort_rate,
                                ifelse(temp$applies_to_this_call == 1 & sim[[2]][[i]]$outline_ind == 0 & temp$target_fraction < 1,               				# applied to full call but not sated
                                                          temp$accrued_effort - sim[[2]][[i]]$total_effort + temp$effort_rate,
                                                          temp$accrued_effort + temp$effort_rate                                                  				# did not apply at all
                                                  ))))))


    # determine updated probability of application


      	temp$time_to_end_adjusted <- temp$time_to_end - min(temp$time_to_end)


    # define time_to_end_adjusted so that after 12 call updates without funding the desire for funding does not constantly increase, to damp down excessive application behaviour


      	temp$time_to_end_adjusted <- ifelse(temp$time_to_end_adjusted < -12, -12, temp$time_to_end_adjusted)


	# the probability of application ought to increse with more accrued effort, decrease as the funding held increases relative to that desired and
	# decrease as the time to end of funding decreases.
	# Need also to deal with various eventualities relating to Inf etc before standardising the data to 1


      	temp$p_of_application <- (temp$accrued_effort) / (temp$target_fraction * (temp$time_to_end_adjusted^time_to_end_moderator))
      	temp$p_of_application[is.infinite(temp$p_of_application)] <- max(temp$p_of_application[!is.infinite(temp$p_of_application)])
      	suppressWarnings(temp$p_of_application[is.nan(temp$p_of_application)] <- max(temp$p_of_application[!is.nan(temp$p_of_application)], na.rm = TRUE))
      	temp$p_of_application <- temp$p_of_application/max(temp$p_of_application)


    # moderate p_of_application on the basis of the most recent meeting's success rate
    # the power to which meeting_success_rate is raised controls the strength of the effect it has


      temp$p_of_application <- temp$p_of_application  - (1 - meeting_success_rate^preceding_call_moderation_parameter)


    # the model can fail to run if the p_of_application drops to 0 for too many applicants
    # when this happens, nobody applies and the meeting composition step uses the forcing route
    # to moderate this, assume that all applicants have at least a small chance of applying in a given year


      temp$p_of_application[temp$p_of_application <= 0|is.na(temp$p_of_application)] <- minimum_p_of_application


	# to record the number of times an applicant has been successful 


      if(i == 1) {

          temp$total_successful_applications <- ifelse(temp$outcome == 1, 1, 0)  

            } else {

          temp$total_successful_applications <-  temp$total_successful_applications + temp$outcome

            }

		
	# to record the number of times an applicant has applied 


      if(i == 1) {

          temp$total_applications <- ifelse(temp$applies_to_this_call == 1, 1, 0)  

            } else {

          temp$total_applications <-  temp$total_applications + temp$applies_to_this_call

            }


    # create a dataframe of diagnostics based on this run and store it in the appropriate slot in a list


        sim[[3]][[i]] <- data.frame(

                                  mean_time_to_end = mean(temp$time_to_end),
                                  mean_accrued_effort = mean(temp$accrued_effort),
                                  mean_cycle_funding = mean(temp$cycle_funding),
                                  mean_p_of_application = round(mean(temp$p_of_application), digits = 2),
                                  number_applying_in_month = sum(temp$applies_to_this_call),
                                  mean_award_size = mean(temp$award_size[temp$outcome == 1])/10,  # makes the numbers look 'nicer' but has no effect
                                  total_awarded = sum(temp$award_size[temp$outcome == 1])/10,  # makes the numbers look 'nicer' but has no effect
                                  meeting_full_proposal_success_rate = round(sum(meeting$outcome)/length(meeting$outcome), digits = 2),
                                  meeting_overall_application_success_rate = round(sum(meeting$outcome)/sum(temp$applies_to_this_call), digits = 2),
                                  meeting_is_outline = sim[[2]][[i]]$outline_ind,
                                  call_is_unpopular = unique(temp$is_unpopular_call),
                                  total_applicant_effort_expended = effort_expended,
                                  applicants_funded = sum(temp$outcome),
                                  month = i

                                  )

    # remove columns that will interfere with the update process

      temp$applies_to_this_call <- NULL
      temp$invite <- NULL
      temp$award_size <- NULL
      temp$outcome <- NULL
      temp$time_to_end_adjusted <- NULL
      temp$accrual_of_cycle_funding <- NULL

    # adjust cycle_target upwards, akin to inflation - this may not be useful as a feature. If not keep the value of cycle_target_inflation set to 1

      temp$cycle_target <- temp$cycle_target * cycle_target_inflation

    # store results in the right place in the right list

      sim[[1]][[i + 1]] <- temp

		sim[[5]][[i]] <- temp$p_of_application

        }
      return(sim)

      }  # end of run_simulations function
    



##########################################################################################################################
## from here on, the process can be looped to generate a large number of repetitions, from which summary data can be
## reported. This is the point at which the simulations are actually run.


simulation_results <- vector(mode = "list", length = n_sims)
diagnostic_results <- vector(mode = "list", length = n_sims)
simulated_applicants <- vector(mode = "list", length = n_sims)
prob_of_application <- vector(mode = "list", length = n_sims)

for(k in 1:n_sims) {

sim <- run_simulations()

all_diagnostics <- ldply(sim[[3]])

## get rid of initial fraction of calls as a sort of burn-in process.

all_diagnostics <- all_diagnostics[round(nrow(all_diagnostics)/burn_in_fraction, digits = 0):(nrow(all_diagnostics) - round(nrow(all_diagnostics)/burn_out_fraction, digits = 0)), ]


effort_comparison <- ddply(all_diagnostics, c("meeting_is_outline"), summarise,
                                    mean_effort_expended = mean(total_applicant_effort_expended),
                                    sd_effort_expended = sd(total_applicant_effort_expended),
                                    mean_applicants = mean(number_applying_in_month),
                                    sd_applicants = sd(number_applying_in_month),
                                    mean_committed = mean(total_awarded),
                                    sd_committed = sd(total_awarded),
                                    perc_unpopular = sum(call_is_unpopular)/length(call_is_unpopular)
                                      )

effort_comparison$simulation_run <- k
all_diagnostics$simulation_run <- k

 	simulation_results[[k]] <- effort_comparison
  	diagnostic_results[[k]] <- all_diagnostics
	simulated_applicants[[k]] <- sim[[1]][[n_call_updates]] 	## take last iteration of the applicants table, for diagnostics of the applicants' behaviour
	prob_of_application[[k]] <- ldply(sim[[5]])

    } 



##################################################################################################################################################
##################################################################################################################################################
##################################################################################################################################################
##################################################################################################################################################
## Code to analyse results
## First get simulation outputs into the required shape


simulation_results <- ldply(simulation_results, .id = NULL)
diagnostic_results <- ldply(diagnostic_results, .id = NULL)
p_of_application_results <- vector(mode = "list", length = length(prob_of_application))

	for(i in 1:length(prob_of_application)) {

						x <- as.data.frame(t(prob_of_application[[i]]))

						res <- lapply(x, function(y) {

							temp <- length(unique(y))
							return(temp)

								})

						res <- ldply(res, .id = NULL)
						res$simulation_run <- i
						res$call_update <- 1:nrow(res)

						p_of_application_results[[i]] <- res

							}

p_of_application_results <- ldply(p_of_application_results)
names(p_of_application_results)[names(p_of_application_results) == "V1"] <- "unique_values"


## simulation_results is the summary of each individual simulation, diagnostic_results is the update by update summary of all simulations and is more detailed
## p_of_application_results is purely to hold the description of the diversity/homogeneity of applicants' p_of_application
## central question: does the average applicant effort expended per £ awarded vary depending on the nature of the call?


diagnostic_results$effort_per_M_cost <- diagnostic_results$total_applicant_effort_expended/(diagnostic_results$total_awarded/1e6)
diagnostic_results$effort_per_applicant <- diagnostic_results$total_applicant_effort_expended/diagnostic_results$number_applying_in_month	# this will always be lowest for outline processes, but it's useful to show this
diagnostic_results$effort_per_proposal_awarded <- diagnostic_results$total_applicant_effort_expended/diagnostic_results$applicants_funded


## arguably the most useful measure for the funder is the effort per unit cost awarded


effort_per_cost <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,

                                    mean_ratio = mean(effort_per_M_cost, na.rm = TRUE), 
                                    median_ratio = median(effort_per_M_cost, na.rm = TRUE),
                                    sd_ratio = sd(effort_per_M_cost, na.rm = TRUE)
              )

effort_per_cost


## but from the point of view of the applicants the most useful measure is the effort per award made


effort_per_award <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,

                                    mean_ratio = mean(effort_per_proposal_awarded, na.rm = TRUE), 
                                    median_ratio = median(effort_per_proposal_awarded, na.rm = TRUE),
                                    sd_ratio = sd(effort_per_proposal_awarded, na.rm = TRUE)
              )

effort_per_award


## prepare the data to show a violin plot of the effort per... across all simulations and call_updates
## record results as sd's of the data as the y axis scale has no meaning


effort_data <- diagnostic_results[ , c("meeting_is_outline", "effort_per_M_cost", "effort_per_applicant", "effort_per_proposal_awarded")]
effort_data <- melt(effort_data, id.vars = c("meeting_is_outline"))
effort_data <- ddply(effort_data, c("variable"), mutate, sd = sd(value))
effort_data$value <- effort_data$value / effort_data$sd

effort_data$meeting_is_outline <- ifelse(effort_data$meeting_is_outline  == 0, "Straight-to-full", "Outline + full")
effort_data$variable <- ifelse(effort_data$variable == "effort_per_M_cost", "Effort per value\nawarded", 
                           ifelse(effort_data$variable == "effort_per_applicant", "Effort per\napplicant applying", "Effort per\nproposal awarded"))


## calculate the % differences in effort across the meeting types
## The percentages calculated are the effort for the outline + full process relative to that of the straight to full process


effort_differences <- ddply(effort_data, c("variable", "meeting_is_outline"), summarise, mean = mean(value), median = median(value))
effort_differences <- ddply(effort_differences, c("variable"), summarise,
										mean_perc = mean[meeting_is_outline == "Outline + full"]/mean[meeting_is_outline == "Straight-to-full"],
										median_perc = median[meeting_is_outline == "Outline + full"]/median[meeting_is_outline == "Straight-to-full"]
								)
effort_differences


## bootstrap intervals for total effort per unit funding committed across all simulations


effort_per_value <- diagnostic_results[ , c("total_awarded", "meeting_is_outline", "total_applicant_effort_expended")]

effort_per_value_list <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

				temp <- effort_per_value[sample(1:nrow(effort_per_value), nrow(effort_per_value), replace = TRUE), ]
				outline_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])
				stf_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])
				outline_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 1])
				stf_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 0])
				outline_ratio <- outline_effort/outline_funding
				stf_ratio <- stf_effort/stf_funding
				ratio_ratio <- stf_ratio/outline_ratio

				effort_per_value_list[[i]] <- data.frame(outline_ratio = outline_ratio, stf_ratio = stf_ratio, ratio_ratio = ratio_ratio)

				}

effort_per_value <- ldply(effort_per_value_list)
effort_per_value_bootstrap <- data.frame(
										upper95 = quantile(effort_per_value$ratio_ratio, probs = c(0.975), na.rm = TRUE),
										lower95 = quantile(effort_per_value$ratio_ratio, probs = c(0.025), na.rm = TRUE),
										median = quantile(effort_per_value$ratio_ratio, probs = c(0.5), na.rm = TRUE)
										)
effort_per_value_bootstrap


## bootstrap intervals for total effort per award made
## this also is across all simulations


effort_per_award <- diagnostic_results[ , c("applicants_funded", "meeting_is_outline", "total_applicant_effort_expended")]
effort_per_award_list <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

				temp <- effort_per_award[sample(1:nrow(effort_per_award), nrow(effort_per_award), replace = TRUE), ]
				outline_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])
				stf_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])
				outline_awards <- sum(temp$applicants_funded[temp$meeting_is_outline == 1])
				stf_awards <- sum(temp$applicants_funded[temp$meeting_is_outline == 0])
				outline_ratio <- outline_effort/outline_awards
				stf_ratio <- stf_effort/stf_awards
				ratio_ratio <- stf_ratio/outline_ratio

				effort_per_award_list[[i]] <- data.frame(outline_ratio = outline_ratio, stf_ratio = stf_ratio, ratio_ratio = ratio_ratio)

				}

effort_per_award <- ldply(effort_per_award_list)
effort_per_award_bootstrap <- data.frame(
										upper95 = quantile(effort_per_award$ratio_ratio, probs = c(0.975), na.rm = TRUE),
										lower95 = quantile(effort_per_award$ratio_ratio, probs = c(0.025), na.rm = TRUE),
										median = quantile(effort_per_award$ratio_ratio, probs = c(0.5), na.rm = TRUE)
										)
effort_per_award_bootstrap


## alternatively, boostrap samples from the n_sims simulations to estimate the likely average across all simulations
## this is perhaps the nicest version of the reporting of the main result as it allows us to say that the effort required
## (per award or unit value) is XX% higher for straight to full than for outline +/- YY%, based on the simulation in the round (not any specific results)


cross_sim_bootstrap <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

		if(i %% 100 == 0) message(paste0("Sample ", i, " of ", bootstrap_samples))

		sims_to_use <- sample(1:n_sims, replace = TRUE)
		temp <- vector(mode = "list", length = length(sims_to_use))

			for(j in seq_along(sims_to_use)) {

				temp[[j]] <- diagnostic_results[which(diagnostic_results$simulation_run == sims_to_use[j]), ]

							}

		temp <- ldply(temp, .id = NULL)

		per_award_outline_ratio <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])/sum(temp$applicants_funded[temp$meeting_is_outline == 1])
		per_award_stf_ratio <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])/sum(temp$applicants_funded[temp$meeting_is_outline == 0])
		per_unit_cost_outline_ratio <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])/sum(temp$total_awarded[temp$meeting_is_outline == 1])
		per_unit_cost_stf_ratio <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])/sum(temp$total_awarded[temp$meeting_is_outline == 0])

		cross_sim_bootstrap[[i]] <- data.frame(

						per_award = per_award_stf_ratio/per_award_outline_ratio,
						per_unit_cost = per_unit_cost_stf_ratio/per_unit_cost_outline_ratio
				)

			}

cross_sim_bootstrap <- melt(ldply(cross_sim_bootstrap))
cross_sim_bootstrap <- ddply(cross_sim_bootstrap, c("variable"), summarise, 

						upper95 = quantile(value, probs = c(0.975)), lower95 = quantile(value, probs = c(0.025)), median = quantile(value, probs = c(0.5)))
		
cross_sim_bootstrap


## difference in mean and median number applying by call type


application_volumes <- ddply(diagnostic_results, c("meeting_is_outline"), summarise, 
								mean_number_applying = mean(number_applying_in_month),
								median_number_applying = median(number_applying_in_month),
								sd = sd(number_applying_in_month)
							)

application_volumes


## and the bootstrap intervals for this also


volume_bootstrap <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

		if(i %% 100 == 0) message(paste0("Sample ", i, " of ", bootstrap_samples))

		sims_to_use <- sample(1:n_sims, replace = TRUE)
		temp <- vector(mode = "list", length = length(sims_to_use))

			for(j in seq_along(sims_to_use)) {

				temp[[j]] <- diagnostic_results[which(diagnostic_results$simulation_run == sims_to_use[j]), ]

							}

		temp <- ldply(temp, .id = NULL)

		volume_bootstrap[[i]] <- ddply(temp, c("meeting_is_outline"), summarise, 
													mean_number_applying = mean(number_applying_in_month),
													median_number_applying = median(number_applying_in_month),
													sd = sd(number_applying_in_month)
							)


		
			}

volume_bootstrap <- melt(ldply(volume_bootstrap, .id = NULL), id.vars = c("meeting_is_outline"))

volume_bootstrap <- ddply(volume_bootstrap, c("variable", "meeting_is_outline"), summarise, 

						upper95 = quantile(value, probs = c(0.975)), lower95 = quantile(value, probs = c(0.025)), median = quantile(value, probs = c(0.5)))
		
volume_bootstrap



######################################################################################
## applicant diagnostics, to show distributions of frequency of application and award.
## These are probably the patterns of greatest importance in the model
## They use the final state of the applicants in each simulation (NB not the final state in the diagnostic_results, as this is truncated)
## First, indicate which simulation run it is


for(q in 1:length(simulated_applicants)) { simulated_applicants[[q]]$simulation_run <- q}


## counts of applicants making n applications


applicant_application <- lapply(simulated_applicants, function(x) {

						res <- as.data.frame(table(x$total_applications))
						res$simulation_run <- unique(x$simulation_run)
						return(res)

				})

applicant_application <- ldply(applicant_application)


## counts of applicants receiving n awards


applicant_success <- lapply(simulated_applicants, function(x) {

						res <- as.data.frame(table(x$total_successful_applications))
						res$simulation_run <- unique(x$simulation_run)
						return(res)

				})

applicant_success <- ldply(applicant_success)


## average number of applications and award per applicant across all simulation runs


av_apps_awards <- ldply(simulated_applicants)
av_apps_awards <- data.frame(
								mean_applications = mean(av_apps_awards$total_applications),
								median_applications = median(av_apps_awards$total_applications),
								sd_applications = sd(av_apps_awards$total_applications),
								max_applications = max(av_apps_awards$total_applications),
								min_applications = min(av_apps_awards$total_applications),
								mean_awards = mean(av_apps_awards$total_successful_applications),
								median_awards = median(av_apps_awards$total_successful_applications),
								sd_awards = sd(av_apps_awards$total_successful_applications),
								max_awards = max(av_apps_awards$total_successful_applications),
								min_awards = min(av_apps_awards$total_successful_applications)
								)

av_apps_awards


## average success rates for calls of each type


call_rate_averages <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,
									mean_rate = mean(meeting_overall_application_success_rate),
									median_rate = median(meeting_overall_application_success_rate)
							)

call_rate_averages


## retain a copy of diagnostic_results from the main simulation, as this may be needed later


diagnostic_results_master <- diagnostic_results


#####################################################################################
## Analysis of applicant behaviour with a funnel plot
## this relies on a method described here: https://osf.io/pumt9_v1
## First get the results showing applicants' success rates across the 49 simulations


app_data <- ldply(simulated_applicants)

app_data <- app_data[ c("total_successful_applications", "total_applications", "simulation_run", "applicant_id")]
app_data$applicant_id <- paste0(app_data$applicant_id, "_", app_data$simulation_run)
app_data$simulation_run <- NULL
app_data$success_rate <- app_data$total_successful_applications / app_data$total_applications


## the regression that determines the relationship between count and rate

a <- .05									
b <- 0.1
fit <- nls( success_rate ~ 1-(1/(exp(a * total_applications ^ b))),
              weights = total_applications,
              start = list(a = a, b = b),
			  #algorithm = "port",
              data = app_data
              )	
model_summary <- summary(fit)
model_summary
model_confints <- confint(fit)
model_confints
coef <- as.vector(coef(fit))
a <- coef[[1]]
b <- coef[[2]]


## determine predicted values over a longer range than the actual data as this makes the plot look nicer

padding <- seq(1, ceiling(max(app_data$total_applications) * 1.05), 1)
padding <- unique(c(padding, app_data$total_applications))
predicted_rate <- 1-(1/(exp(a * padding ^ b)))

z_95 <- 2
z_99 <- 3

se <- sqrt((predicted_rate * (1-predicted_rate))/padding)
upper95_wald <- predicted_rate + z_95*se
lower95_wald <- predicted_rate - z_95*se
upper99_wald <- predicted_rate + z_99*se
lower99_wald <- predicted_rate - z_99*se

## add the ranges and merge with the org data

funnel_data <- data.frame(padding, predicted_rate, upper95_wald, lower95_wald, upper99_wald, lower99_wald)
funnel_data <- merge(funnel_data, app_data, by.x = "padding", by.y = "total_applications", all.x = TRUE)

## omit funnel limits that sit above 100% or below 0%

funnel_data[ , c("upper95_wald", "lower95_wald", "upper99_wald", "lower99_wald")][ funnel_data[ , c("upper95_wald", "lower95_wald", "upper99_wald", "lower99_wald")] > 1] <- NA
funnel_data[ , c("upper95_wald", "lower95_wald", "upper99_wald", "lower99_wald")][ funnel_data[ , c("upper95_wald", "lower95_wald", "upper99_wald", "lower99_wald")] < 0] <- NA

## there's a bit too much data so thin it out a bit for presentation

funnel_data_thin <- funnel_data[sample(1:nrow(funnel_data), nrow(funnel_data)/20), ]




#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################
######################################################          CHARTS          #################################################################
#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################
## where necessary some minor data manipulation is also done here, for specific charts
## otherwise it draws on objects created earlier
## set a standard theme, though this does vary a bit across plots


standard_theme <- theme(
				                  panel.grid.major.y = element_blank(), #element_line(linewidth = 0.2, colour = "black"),
				                  panel.grid.minor.y = element_blank(),
				                  panel.grid.major.x = element_blank(), #element_line(linewidth = 0.2, colour = "black"),
				                  panel.grid.minor.x = element_blank(),
				                  axis.title.x = element_text(colour = "black", size = 10),
				                  axis.ticks = element_blank(),
				                  axis.title.y = element_text(colour = "black", size = 10),
				                  axis.text.y = element_blank(),
				                  axis.text.x = element_text(colour = "black", size = 8),
				                  strip.text = element_text(colour = "white", face = "bold"),
				                  strip.background = element_rect(fill = "black", colour = "black"),
				                  plot.background = element_rect(fill = "white", colour = NA),
				                  legend.text = element_text(colour = "black"),
				                  legend.title = element_text(colour = "black", vjust = 0.5),
				                  panel.border = element_rect(color = "black", fill = NA, linewidth = 0.2),
				                  text = element_text(family = "serif"),
                          			legend.position = "none"
				                )



###############################################
## the funnel plot of applicants' success rates


funnel_plot <- ggplot(data = funnel_data_thin, aes(x = padding)) +

		geom_line(aes(y = upper99_wald), colour = "grey33", lty = 2, linewidth = 0.5) +
  		geom_line(aes(y = lower99_wald), colour = "grey33", lty = 2, linewidth = 0.5) +
 	 	geom_point(aes(x = padding, y = success_rate), alpha = 0.1, size = 1, shape = 21, fill = NA) +
		geom_line(aes(y = predicted_rate), colour = "red", linewidth = 1) +
		theme_minimal() +
  		scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(0, 1)) +					
  		labs(	title = "",
        		subtitle = "",
        		x = "\nNumber of applications submitted by applicant",
        		y = "Success rate\n",
        		fill = "",
				colour = "",
			caption = ""
  		) +
  		standard_theme +
		theme(axis.text.y = element_text(size = 6))

funnel_plot

ggsave("00_funnel_plot_of_applicant_success_rates.pdf", width = plot_width, height = plot_height)
ggsave("00_funnel_plot_of_applicant_success_rates.svg", width = plot_width, height = plot_height)


############################################################################
## overall distributions of number of people applying to calls, by call type

call_data <- diagnostic_results[ , c("number_applying_in_month", "meeting_is_outline")]
call_data$meeting_is_outline <- ifelse(call_data$meeting_is_outline == 1, "Outline + full", "Straight-to-full")

application_instances_by_call <- ggplot(call_data, aes(x = number_applying_in_month)) +
                					geom_histogram() +
                					facet_wrap( ~ meeting_is_outline) +
									theme_minimal() +
                        			labs(	x = "\nNumber of applications",
                              				y = "") +
                        			standard_theme

application_instances_by_call

ggsave("01_number_of_applicants_applying_by_call_type.pdf", width = plot_width, height = plot_height)
ggsave("01_number_of_applicants_applying_by_call_type.svg", width = plot_width, height = plot_height)


###########################################################################################################
## violin plot of diagnostic_results showing distributions of the three 'per...' outcomes across call types


all_data_violin_plot <- ggplot(effort_data, aes(x = factor(meeting_is_outline), y = log(value))) +
                        geom_violin(fill = "grey66", colour = NA) +
                        facet_wrap( ~ variable) +
                        stat_summary(fun.y = median, geom = "point", size = 2, color = "black") +
                        theme_minimal() +
                        labs(x = "",
                              y = "Log of metric\n") +
                        standard_theme

all_data_violin_plot

ggsave("02_key_metric_comparison.pdf", width = plot_width, height = plot_height)
ggsave("02_key_metric_comparison.svg", width = plot_width, height = plot_height)


#################################
## stability of funding over time


inflation <- ggplot(diagnostic_results, aes(x = month, y = mean_cycle_funding)) +
                geom_smooth(colour = "black", linewidth = 0.5) +
                facet_wrap( ~ simulation_run) +
				theme_minimal() +
                        labs(x = "",
                              y = "Mean update funding\n") +
                        standard_theme +
						theme(	strip.text = element_blank(),
				                strip.background = element_blank(),
								axis.text.x = element_text(size = 4)
							)

inflation

ggsave("03_funding_inflation.pdf", width = plot_width, height = plot_height)
ggsave("03_funding_inflation.svg", width = plot_width, height = plot_height)


#######################################################
## stability of number of applicants applying over time


application_inflation <- ggplot(diagnostic_results, aes(x = month, y = number_applying_in_month/n_applicants)) +
                geom_smooth(colour = "black", linewidth = 0.5) +
                facet_wrap( ~ simulation_run) +
				scale_y_continuous(labels = percent_format(accuracy = 1)) +
				theme_minimal() +
                        labs(x = "",
                              y = "Potential applicants applying\n") +
                       standard_theme +
					   theme(	strip.text = element_blank(),
				                strip.background = element_blank(),
								axis.text.x = element_text(size = 4)
							)

application_inflation

ggsave("04_application_inflation.pdf", width = plot_width, height = plot_height)
ggsave("04_application_inflation.svg", width = plot_width, height = plot_height)


############################################
## homogeneity of probability of application


application_p_homogeneity <- ggplot(p_of_application_results, aes(x = call_update, y = unique_values)) +
				geom_line() +
                facet_wrap( ~ simulation_run) +
				scale_y_continuous(limits = c(0, NA)) +
				theme_minimal() +
                        labs(x = "",
                              y = "Number of unique values of p_of_application\n") +
                        standard_theme +
					   	theme(	strip.text = element_blank(),
				                strip.background = element_blank(),
								axis.text.x = element_text(size = 4)
							)

application_p_homogeneity

ggsave("05_application_probability_homogeneity.pdf", width = plot_width, height = plot_height)
ggsave("05_application_probability_homogeneity.svg", width = plot_width, height = plot_height)


###############################################################
## distribution of instances of application for each simulation


application_instances <- ggplot(applicant_application, aes(x = as.integer(Var1), y = Freq/n_applicants)) +
                geom_bar(stat = "identity") +
                facet_wrap( ~ simulation_run) +
				scale_y_continuous(labels = percent_format(accuracy = 1)) +
				theme_minimal() +
                        labs(x = "\nNumber of applications",
                              y = "Prevalence\n") +
                        standard_theme +
					   	theme(	strip.text = element_blank(),
				                strip.background = element_blank(),
								axis.text.x = element_text(size = 4)
							)

application_instances

ggsave("06_number_of_applicants_applying_n_times.pdf", width = plot_width, height = plot_height)
ggsave("06_number_of_applicants_applying_n_times.svg", width = plot_width, height = plot_height)


##########################################################################
## distribution of instances of successful application for each simulation


successful_application_instances <- ggplot(applicant_success, aes(x = as.integer(Var1), y = Freq/n_applicants)) +
                geom_bar(stat = "identity") +
                facet_wrap( ~ simulation_run) +
				scale_y_continuous(labels = percent_format(accuracy = 1)) +
				theme_minimal() +
                        labs(x = "\nNumber of successful applications",
                              y = "Prevalence\n") +
                         standard_theme +
					   	theme(	strip.text = element_blank(),
				                strip.background = element_blank(),
								axis.text.x = element_text(size = 4)
							)

successful_application_instances

ggsave("07_number_of_applicants_successful_n_times.pdf", width = plot_width, height = plot_height)
ggsave("07_number_of_applicants_successful_n_times.svg", width = plot_width, height = plot_height)


#########################################
## distribution of success rates of calls
## need a bit of data manipulation first


rate_data <- diagnostic_results[ , c("number_applying_in_month", "total_awarded", "meeting_full_proposal_success_rate",
										"meeting_overall_application_success_rate", "meeting_is_outline", "total_applicant_effort_expended", "applicants_funded")]
rate_data$meeting_is_outline <- ifelse(rate_data$meeting_is_outline == 1, "Outline + full", "Straight-to-full")
rate_comparison_data <- melt(rate_data[ , c("meeting_full_proposal_success_rate", "meeting_overall_application_success_rate", "meeting_is_outline")],
												id.vars = c("meeting_is_outline"))
rate_comparison_data$variable <- ifelse(rate_comparison_data$variable == "meeting_full_proposal_success_rate",
														"Full proposals only", "All instances of application")

call_rates <- ggplot(rate_comparison_data, aes(x = value)) +
                geom_histogram() +
				facet_grid(meeting_is_outline ~ variable) +
				scale_x_continuous(labels = percent_format(accuracy = 1)) +
				theme_minimal() +
                        labs(x = "\nCall success rate",
                              y = "") +
                        standard_theme

call_rates

ggsave("08_distribution_of_success_rates.pdf", width = plot_width, height = plot_height)
ggsave("08_distribution_of_success_rates.svg", width = plot_width, height = plot_height)


###############################################################################
## effort expended by applicants in calls by level of application, by call type


application_levels <- ggplot(rate_data,
						aes(x = number_applying_in_month/n_applicants, y = total_applicant_effort_expended, group = meeting_is_outline, colour = meeting_is_outline)) +
                geom_point(shape = 21, fill = NA, alpha = 0.2) +
				scale_x_continuous(labels = percent_format(accuracy = 1)) +
				scale_colour_manual(values = c("red", "blue")) +
				geom_smooth(se = FALSE) +
				theme_minimal() +
                        labs(x = "\nApplicants applying",
                              y = "Total effort expended\n",
								colour = "") +
                       standard_theme +
						theme(legend.position = "bottom")

application_levels

ggsave("09_effort_versus_percent_of_applicants_applying.pdf", width = plot_width, height = plot_height)
ggsave("09_effort_versus_percent_of_applicants_applying.svg", width = plot_width, height = plot_height)



######################################################################################################################################################
######################################################################################################################################################
## Two instances of varying the state variables systematically to understand their effects on the outcomes of interest
## we expect that we should have more applicants for outline calls than for fulls
## and we should expect that as realism changes, the difference in effort per unit value committed changes
## Remember that the total value of awards does not vary by call type, to this directly tells us about efficiency.


applicants_and_effort_by_call_type <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,

                          								mean_applicants = mean(number_applying_in_month),
                          								median_applicants = median(number_applying_in_month),
														mean_effort_expended = mean(total_applicant_effort_expended),
														median_effort_expended = median(total_applicant_effort_expended)
                  )

applicants_and_effort_by_call_type


# and that is what we see
# adjusting realism_beta_1 and realism_beta_2 ought to change this
# check this assumption by sampling values of realism_beta_1 between 1 and 100, keeping realism_beta_2 unchanged
# this will give an effort_realism of between 9% and 91% (higher values indicate less discounting of future effort required)
# See how the mean and median differences in number of applicants to each of the two call types varies with realism_beta_1
# we can strip back some of the outputs to save time, in particular just run one simulation each time
# at this point diagnostic_results will get overwritten, which is why a copy of the main simulation was saved earlier


n_sims <- 1    														# don't forget that this has changed
bootstrap_samples <- 1000 											# this too

varying_realism_beta_1 <- 10^runif(systematic_variations, min = 0, max = 2)								# random uniform distribution of log values gives more focus on more realistic values
applicants_by_call_type_list <- vector(mode = "list", length = length(varying_realism_beta_1))			# two lists, to store results
effort_ratio_change_list <- vector(mode = "list", length = length(varying_realism_beta_1))

for(p in 1:length(varying_realism_beta_1)) {

    message(paste0("                                                              Instance ", p, " of ", length(varying_realism_beta_1)))
    realism_beta_1 <- varying_realism_beta_1[p]
    diagnostic_results <- vector(mode = "list", length = n_sims)

        for(k in 1:n_sims) {

            sim <- run_simulations()
            all_diagnostics <- ldply(sim[[3]])
            all_diagnostics <- all_diagnostics[round(nrow(all_diagnostics)/burn_in_fraction, digits = 0):(nrow(all_diagnostics) - round(nrow(all_diagnostics)/burn_out_fraction, digits = 0)), ]
            all_diagnostics$simulation_run <- k
            diagnostic_results[[k]] <- all_diagnostics

    			} 

diagnostic_results <- ldply(diagnostic_results, .id = NULL)
diagnostic_results_simplified <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,
                          Mean = mean(number_applying_in_month),
                          Median = median(number_applying_in_month))
diagnostic_results_simplified$beta_1 <- varying_realism_beta_1[p]

# calculating the effort per unit cost measure

effort_per_unit_cost <- diagnostic_results[ , c("total_awarded", "meeting_is_outline", "total_applicant_effort_expended")]
effort_per_unit_cost_list <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

				if(i %% 100 == 0) message(paste0("Boostrapping instance ", i))
				temp <- effort_per_unit_cost[sample(1:nrow(effort_per_unit_cost), nrow(effort_per_unit_cost)/4, replace = TRUE), ]
				outline_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])
				stf_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])
				outline_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 1])
				stf_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 0])
				outline_ratio <- outline_effort/outline_funding
				stf_ratio <- stf_effort/stf_funding
				ratio_ratio <- stf_ratio/outline_ratio

				effort_per_unit_cost_list[[i]] <- data.frame(outline_ratio = outline_ratio, stf_ratio = stf_ratio,
														 ratio_ratio = ratio_ratio)

				}

effort_per_unit_cost <- ldply(effort_per_unit_cost_list)
effort_per_unit_cost_bootstrap <- data.frame(
										upper95 = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.975)),
										lower95 = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.025)),
										median = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.5))
										)
effort_per_unit_cost_bootstrap$beta_1 <- varying_realism_beta_1[p]


applicants_by_call_type_list[[p]] <- diagnostic_results_simplified
effort_ratio_change_list[[p]] <- effort_per_unit_cost_bootstrap        

	}				## end of this for loop


##########################################################################################
## we might also expect that changes in the relative effort used by outlines and fulls would have effects on the ratio of effort
## to show this we can vary full_proposal_ratio systematically
## the logic is the same as in the preceding code


varying_full_proposal_ratio <- 10^runif(systematic_variations, min = -1, max = 1)							# again, evenly distributed across log values, to focus on the most interesting areas
fpr_applicants_by_call_type_list <- vector(mode = "list", length = length(varying_full_proposal_ratio))
fpr_effort_ratio_change_list <- vector(mode = "list", length = length(varying_full_proposal_ratio))

for(p in 1:length(varying_full_proposal_ratio)) {

    message(paste0("                                                              Instance ", p, " of ", length(varying_full_proposal_ratio)))
    full_proposal_ratio <- varying_full_proposal_ratio[p]
    diagnostic_results <- vector(mode = "list", length = n_sims)

        for(k in 1:n_sims) {

            sim <- run_simulations()
            all_diagnostics <- ldply(sim[[3]])
            all_diagnostics <- all_diagnostics[round(nrow(all_diagnostics)/burn_in_fraction, digits = 0):(nrow(all_diagnostics) - round(nrow(all_diagnostics)/burn_out_fraction, digits = 0)), ]
            all_diagnostics$simulation_run <- k
            diagnostic_results[[k]] <- all_diagnostics

    } 

diagnostic_results <- ldply(diagnostic_results, .id = NULL)
diagnostic_results_simplified <- ddply(diagnostic_results, c("meeting_is_outline"), summarise,
                          Mean = mean(number_applying_in_month),
                          Median = median(number_applying_in_month))
diagnostic_results_simplified$temp_full_proposal_ratio <- varying_full_proposal_ratio[p]

effort_per_unit_cost <- diagnostic_results[ , c("total_awarded", "meeting_is_outline", "total_applicant_effort_expended")]
effort_per_unit_cost_list <- vector(mode = "list", length = bootstrap_samples)

for(i in 1:bootstrap_samples) {

				if(i %% 100 == 0) message(paste0("Boostrapping instance ", i))
				temp <- effort_per_unit_cost[sample(1:nrow(effort_per_unit_cost), nrow(effort_per_unit_cost)/4, replace = TRUE), ]
				outline_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 1])
				stf_effort <- sum(temp$total_applicant_effort_expended[temp$meeting_is_outline == 0])
				outline_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 1])
				stf_funding <- sum(temp$total_awarded[temp$meeting_is_outline == 0])
				outline_ratio <- outline_effort/outline_funding
				stf_ratio <- stf_effort/stf_funding
				ratio_ratio <- stf_ratio/outline_ratio

				effort_per_unit_cost_list[[i]] <- data.frame(outline_ratio = outline_ratio, stf_ratio = stf_ratio,
														 ratio_ratio = ratio_ratio)

				}

effort_per_unit_cost <- ldply(effort_per_unit_cost_list)
effort_per_unit_cost_bootstrap <- data.frame(
										upper95 = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.975)),
										lower95 = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.025)),
										median = quantile(effort_per_unit_cost$ratio_ratio, probs = c(0.5))
										)
effort_per_unit_cost_bootstrap$temp_full_proposal_ratio <- varying_full_proposal_ratio[p]


fpr_applicants_by_call_type_list[[p]] <- diagnostic_results_simplified
fpr_effort_ratio_change_list[[p]] <- effort_per_unit_cost_bootstrap        

}


## get the outputs into the right format for plotting


applicant_differences <- lapply(applicants_by_call_type_list, function(x) {

			if(nrow(x) == 0) return(NULL)

          return(data.frame(
            Mean = x$Mean[x$meeting_is_outline == 1] - x$Mean[x$meeting_is_outline == 0],
            Median = x$Median[x$meeting_is_outline == 1] - x$Median[x$meeting_is_outline == 0],
            beta_1 = unique(x$beta_1)
          ))

    })

applicant_differences <- ldply(applicant_differences)
applicant_differences_long <- melt(applicant_differences, id.vars = c("beta_1"))


fpr_applicant_differences <- lapply(fpr_applicants_by_call_type_list, function(x) {

			if(nrow(x) == 0) return(NULL)

          return(data.frame(
            Mean = x$Mean[x$meeting_is_outline == 1] - x$Mean[x$meeting_is_outline == 0],
            Median = x$Median[x$meeting_is_outline == 1] - x$Median[x$meeting_is_outline == 0],
            temp_full_proposal_ratio = unique(x$temp_full_proposal_ratio)
          ))

    })

fpr_applicant_differences <- ldply(fpr_applicant_differences)
fpr_applicant_differences_long <- melt(fpr_applicant_differences, id.vars = c("temp_full_proposal_ratio"))




##########################################################################################################################################
##########################################################################################################################################
## charts using the data based on systematically varying realism_beta_1 and full_proposal_ratio
## first, a chart showing difference in extent of application varying with change in realism

realism_beta_1 <- 5

all_applicant_data <- ldply(applicants_by_call_type_list)
all_applicant_data$meeting_is_outline <- ifelse(all_applicant_data$meeting_is_outline == 0, "Straight-to-full", "Outline + Full")

all_applicant_data_plot <- ggplot(all_applicant_data, aes(x = beta_1, y = Median, group = factor(meeting_is_outline), colour = factor(meeting_is_outline))) +
                geom_point(alpha = 0.2, shape = 21) +
                geom_smooth(se = FALSE) +
                geom_hline(yintercept = 0, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = realism_beta_1, colour = "black", linetype = "dotted") +
                scale_colour_manual(values = c("red2", "blue")) +
                scale_x_log10() +
                #scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(-0.1, 0.2)) +
                #facet_wrap(~ variable) +
                theme_minimal() +
                labs(
                      x = "\nrealism_beta_1",
                      y = "Median number applying\n",
                      colour = ""
                    ) +
                		standard_theme  +
						theme(legend.position = "bottom")

all_applicant_data_plot


ggsave("10_variation_of_application_volumes_with_realism.pdf", width = plot_width, height = plot_height)
ggsave("10_variation_of_application_volumes_with_realism.svg", width = plot_width, height = plot_height)


applicant_differences_plot <- ggplot(applicant_differences_long, aes(x = beta_1/(beta_1 + realism_beta_2), y = value/n_applicants, group = variable, colour = variable)) +
                geom_point(alpha = 0.2, shape = 21, colour = "black") +
                geom_smooth(se = FALSE) +
                geom_hline(yintercept = 0, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = realism_beta_1/(realism_beta_1 + realism_beta_2), colour = "black", linetype = "dotted") +
                scale_colour_manual(values = c("red2", "blue")) +
                scale_x_continuous(labels = percent_format(accuracy = 1)) +
                scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(-0.1, 0.2)) +
                facet_wrap(~ variable) +
                theme_minimal() +
                labs(
                      x = "\nApplicant 'effort realism'",
                      y = "Difference in % of applicants applying\n('Outline + full' - 'Straight-to-full')\n",
                      colour = ""
                    ) +
                		standard_theme +
				theme(axis.text.y = element_text(size = 6)
						)
          
applicant_differences_plot

ggsave("11_variation_of_differences_in_application_volumes_with_realism.pdf", width = plot_width, height = plot_height)
ggsave("11_variation_of_differences_in_application_volumes_with_realism.svg", width = plot_width, height = plot_height)


## chart showing difference in effort per unit cost varying with change in realism

effort_ratio_change <- ldply(effort_ratio_change_list)

effort_ratio_change_plot <- ggplot(effort_ratio_change, aes(x = beta_1, y = median)) +
                geom_point(alpha = 0.2, shape = 21, colour = "black") +
                geom_smooth(colour = "red2", se = FALSE) +
                geom_hline(yintercept = 1, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = realism_beta_1, colour = "black", linetype = "dotted") +
                scale_x_log10() +
                theme_minimal() +
                labs(
                      x = "\nrealism_beta_1",
                      y = "Ratio of effort per unit value\n('Straight-to-full'/'Outline + full')\n",
                      colour = ""
                    ) +
                standard_theme
          
effort_ratio_change_plot

ggsave("12_variation_of_cost_ratios_with_realism.pdf", width = plot_width, height = plot_height)
ggsave("12_variation_of_cost_ratios_with_realism.svg", width = plot_width, height = plot_height)


## chart showing difference in extent of application varying with change in fpr

full_proposal_ratio <- 1

fpr_all_applicant_data <- ldply(fpr_applicants_by_call_type_list)
fpr_all_applicant_data$meeting_is_outline <- ifelse(fpr_all_applicant_data$meeting_is_outline == 1, "'Outline + full'", "Straight-to-full")

fpr_all_applicant_data_plot <- ggplot(fpr_all_applicant_data, aes(x = temp_full_proposal_ratio, y = Median, group = factor(meeting_is_outline), colour = factor(meeting_is_outline))) +
                geom_smooth(se = FALSE) +
                geom_hline(yintercept = 0, colour = "grey44", linetype = "dashed") +
                scale_colour_manual(values = c("red2", "blue")) +
				geom_vline(xintercept = full_proposal_ratio, colour = "black", linetype = "dotted") +
                theme_minimal() +
                labs(
                      x = "\nfull_proposal_ratio",
                      y = "Median number applying\n",
                      colour = ""
                    ) +
                	standard_theme +
				theme(legend.position = "bottom")
				
fpr_all_applicant_data_plot

ggsave("13_variation_of_application_volumes_with_full_proposal_ratio.pdf", width = plot_width, height = plot_height)
ggsave("13_variation_of_application_volumes_with_full_proposal_ratio.svg", width = plot_width, height = plot_height)



fpr_applicant_differences_plot <- ggplot(fpr_applicant_differences_long, aes(x = temp_full_proposal_ratio, y = value/n_applicants, group = variable, colour = variable)) +
                geom_point(alpha = 0.2, shape = 21, colour = "black") +
                geom_smooth() +
                geom_hline(yintercept = 0, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = full_proposal_ratio, colour = "black", linetype = "dotted") +
                scale_colour_manual(values = c("red2", "blue")) +
                scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(-0.1, 0.1)) +
				geom_vline(xintercept = full_proposal_ratio, colour = "black", linetype = "dotted") +
                facet_wrap(~ variable) +
				scale_x_log10() +
                theme_minimal() +
                labs(
                      x = "\nfull_proposal_ratio",
                      y = "Difference in % of applicants applying\n('Outline + full' - 'Straight-to-full')\n",
                      colour = ""
                    ) +
                		standard_theme +
				theme(legend.position = "bottom")
          
fpr_applicant_differences_plot

ggsave("14_variation_of_differences_in_application_volumes_with_full_proposal_ratio.pdf", width = plot_width, height = plot_height)
ggsave("14_variation_of_differences_in_application_volumes_with_full_proposal_ratio.svg", width = plot_width, height = plot_height)


## chart showing difference in effort per unit cost varying with change in fpr

fpr_effort_ratio_change <- ldply(fpr_effort_ratio_change_list)

fpr_effort_ratio_change_plot <- ggplot(fpr_effort_ratio_change, aes(x = temp_full_proposal_ratio, y = median)) +
                geom_point(alpha = 0.2, shape = 21, colour = "black") +
                geom_smooth(colour = "red2", se = FALSE) +
                geom_hline(yintercept = 1, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = full_proposal_ratio, colour = "black", linetype = "dotted") +
                theme_minimal() +
				scale_x_log10() +
                labs(
                      x = "\nfull_proposal_ratio",
                      y = "Ratio of effort per unit value\n('Straight-to-full'/'Outline + full')\n",
                      colour = ""
                    ) +
                		standard_theme
          
fpr_effort_ratio_change_plot

ggsave("15_variation_of_cost_ratios_with_full_proposal_ratio.pdf", width = plot_width, height = plot_height)
ggsave("15_variation_of_cost_ratios_with_full_proposal_ratio.svg", width = plot_width, height = plot_height)


## 95% ranges for cuts of the data


step_size <- 0.2
cuts <- seq(from = step_size, to = 10 - step_size, by = step_size/2)
cuts_list <- as.list(cuts)
cuts_list <- lapply(cuts_list, function(x) {


				temp <- fpr_effort_ratio_change[which(	fpr_effort_ratio_change$temp_full_proposal_ratio >= (x - step_size) &
														fpr_effort_ratio_change$temp_full_proposal_ratio < (x + step_size)), ]
				
				
				upper95 <- quantile(temp$median, probs = c(0.975))
				lower95 <- quantile(temp$median, probs = c(0.025))
				
				temp <- data.frame(upper95, lower95)
				temp$.id <- x
		
				return(temp)

				})

cuts_list <- ldply(cuts_list)



fpr_effort_ratio_change_plot_alt <- ggplot() +
                geom_point(data = fpr_effort_ratio_change, aes(x = temp_full_proposal_ratio, y = median), alpha = 0.2, shape = 21, colour = "black") +
				geom_ribbon(data = cuts_list, aes(x = .id, ymin = lower95, ymax = upper95), alpha = 0.3) +
                #geom_smooth(data = fpr_effort_ratio_change, aes(x = temp_full_proposal_ratio, y = median), colour = "red2", se = FALSE) +
				geom_segment(data = cuts_list, aes(x = .id, xend = .id, y = lower95, yend = upper95)) +
                geom_hline(yintercept = 1, colour = "grey44", linetype = "dashed") +
				geom_vline(xintercept = full_proposal_ratio, colour = "black", linetype = "dotted") +
                theme_minimal() +
				scale_x_log10() +
                labs(
                      x = "\nfull_proposal_ratio",
                      y = "Ratio of effort per unit value\n('Straight-to-full'/'Outline + full')\n",
                      colour = ""
                    ) +
                		standard_theme
          
fpr_effort_ratio_change_plot_alt

ggsave("15_alt_variation_of_cost_ratios_with_full_proposal_ratio.pdf", width = plot_width, height = plot_height)
ggsave("15_alt_variation_of_cost_ratios_with_full_proposal_ratio.svg", width = plot_width, height = plot_height)