######################################################################################
##                                                                                  ##
## A script to look at differences between epidemics and endemic diasease in space  ##
## and time to see if there are ways to differentiate between them.                 ##
## Starts with a simulation model(s) for cases, then looks at using R-INLA using    ##
## the position of cases with teh timing of cases as the modelled outcome to see if ##
## using an spde approach on the timing of a case distribution across space is of   ##
## any use whatsoever.                                                              ##
##                                                                                  ##
## Written by G.T. Innocent (giles.innocent@bioss.ac.uk)                            ##
## Written on 04/11/2015                                                            ##
## Filed @ ~/R_work/EFSA/epi_endemic_models.R                                       ##
##                                                                                  ##
######################################################################################

## Simulate some data. All cases will be in the unit square.
## The population is 1000, randomly dispersed throughout the area.

create.population <- function(n.pop = 1000) {
    x <- runif(n.pop, 0, 1)
    y <- runif(n.pop, 0, 1)
    return(data.frame(x=x, y=y, index=1:n.pop))
}

env.inf.model <- function(population, initial.n, t.max, beta, gamma=1, delta=1)  {
    ## model for constant rate across population irrespective of number infected or population etc.
    ## population is the x-y coordinates of the population
    ## initial.n is the initial number infected
    ## t.max is the time for which the simulation is to run
    ## beta is the rate S->I, gamma: rate I -> R and delta: rate R -> S
    ## In infection.state 0 = S, 1 = I, 2 = R.

    return.df <- data.frame(x=numeric(0), y=numeric(0), time=numeric(0), S=numeric(0), I=numeric(0), R=numeric(0))
    n.pop <- length(population$x)
    infection.state <- numeric(n.pop)
    infection.state[sample(n.pop, initial.n, replace = FALSE)] <- 1
    times <- rexp(n.pop)/((infection.state == 0)*beta +  (infection.state == 1)*gamma +
                          (infection.state == 2)*delta)
    next.event <- min(times)
    while(next.event < t.max) {
        which.next <- which(times == next.event)
        if(infection.state[which.next] == 0) { ## i.e. is an infection event
            return.df <- rbind(return.df, data.frame(x=population$x[which.next],
                                                     y=population$y[which.next],
                                                     index=population$index[which.next],
                                                     time=next.event,
                                                     S=sum(infection.state == 0),
                                                     I=sum(infection.state == 1),
                                                     R=sum(infection.state == 2)))
        }
        infection.state[which.next] <- (infection.state[which.next]+1) %% 3
        times[which.next] <- next.event + rexp(1)/((infection.state[which.next] == 0)*beta +
                                      (infection.state[which.next] == 1)*gamma +
                                      (infection.state[which.next] == 2)*delta)
        next.event <- min(times)
    }
    return(return.df)
}

calc.dist <- function(X=1, Y=1, points)  {
    ## function to calculate the distance between two points, using Pythagoras' rule
    ## Note X and Y are the indices of points that we wish to
    ## find the distance between. This is to allow it to be called using outer

    dist <- sqrt((points$x[X] - points$x[Y])^2 + (points$y[X] - points$y[Y])^2)
    return(dist)
}

kernel <- function(dist, beta, tau) {
    ##just a simple inverse square law kernel. Other kernels are available
    ## inverse square law doesn't work very well so try negative exponential
    ## however, in order to standardise it we are going to scale it so that
    ## the area under the curve between 0 and inf is beta, i.e. the
    ## expectation between 0 and inf is beta. Note integral(e^-nx) = -(1/n)e^-nx + C
    ## => area under curve[0,Inf] is 1/n => we need to scale by n (tau in our parameter set)
    
    ##return(beta/(dist^2))
    return(beta*tau*exp(-tau*dist))
}

calc.infection.rates <- function(pop, inf.state, beta, tau)  {
    ## Calculation of infection rates for each individual, using all infectious animals and
    ## their distances to each susceptible animal.
    ## first calculate the distance from:to each point using outer
    ## then apply kernel to these distances
    ## then sum kernels just for from all points to infectious sites using apply

    distances <- outer(1:length(pop$x), 1:length(pop$x), calc.dist, pop)
    distance.effect <- kernel(distances, beta, tau)
    if(sum(inf.state==1)>1) {
        return(apply(distance.effect[,inf.state==1], 1, sum))
    }else if(sum(inf.state==1)==1) {
        return(distance.effect[,inf.state==1])
    }else{
        return(rep(0, length(pop$x)))
    }
}

inf.transmission.model <- function(population, initial.n, t.max, beta, gamma=1, delta=1, tau=1, initial.x=NULL, initial.y=NULL)  {
    ## Model for constant rate across population irrespective of number infected or population etc.
    ## population is the x-y coordinates of the population
    ## initial.n is the initial number infected
    ## t.max is the time for which the simulation is to run
    ## beta is the rate S->I, gamma: rate I -> R and delta: rate R -> S
    ## note no birth process, but I->R->S ensures new susceptibles, if required.
    
    return.df <- data.frame(x=numeric(0), y=numeric(0), time=numeric(0), S=numeric(0), I=numeric(0), R=numeric(0))
    n.pop <- length(population$x)
    infection.state <- numeric(n.pop)
    if(is.null(initial.x)) {
        infection.state[sample(n.pop, initial.n, replace = FALSE)] <- 1
    }else{
        dists <- sqrt((initial.x - population$x)^2 + (initial.y- population$y)^2)
        infection.state[which(dists == min(dists))] <- 1  ## select closest point to nit.x, init.y
    }
    infection.rates <- calc.infection.rates(population, infection.state, beta, tau)
    times <- rexp(n.pop)
    times[infection.state == 0] <- times[infection.state == 0]/infection.rates[infection.state == 0]
    times[infection.state == 1] <- times[infection.state == 1]/gamma
    times[infection.state == 2] <- times[infection.state == 2]/delta
    next.event <- min(times)
    
    while(next.event < t.max) {
        which.next <- which(times == next.event)
        if(infection.state[which.next] == 0) { ## i.e. is an infection event
            return.df <- rbind(return.df, data.frame(x=population$x[which.next],
                                                     y=population$y[which.next],
                                                     index=population$index[which.next],
                                                     time=next.event,
                                                     S=sum(infection.state == 0),
                                                     I=sum(infection.state == 1),
                                                     R=sum(infection.state == 2)))
        }
        infection.state[which.next] <- (infection.state[which.next]+1) %% 3
        times[which.next] <- switch(infection.state[which.next]+1, 0, next.event + 1/gamma,
                                    next.event + 1/delta)
        ## this gives 0 if susceptible (will be calculated by the next 2 lines)
        ## gives next.event + 1/gamma if infected, I.E. will be infected for
        ## 1/gamma time units (fixed) and similarly next.event + 1/delta once recovered.
        infection.rates <- calc.infection.rates(population, infection.state, beta, tau)
        times[infection.state == 0] <- next.event + rexp(sum(infection.state == 0))/
            infection.rates[infection.state == 0]
        ## note previous line may be less efficient than recalculating times using previous
        ## random number
        next.event <- min(times)
    }
    return(return.df)
}
    
set.seed(1)

pop <- create.population()
for (i in 1:10) {
    print(paste("Endemic infection iteration", i, sep = " "))
    infections <- env.inf.model(pop, 0, 10, log(2)/10, 0.0000001, 1) # note making the parameter of the exponential log(2)/k results in the median time to infection k time units. In this case that means that we should end up with 500 cases (population size is 1000)
    save(infections, file = paste("results_endemic_new", i, "RData", sep = "."))
}

for(i in 16:20) {
##for(i in 1:5) {
    infections <- data.frame(x=numeric(0), y=numeric(0), time=numeric(0)) # just so we can check dim()
    while(dim(infections)[1] < 3) {  # need this as infection may die out immediately. Also doesn't work on a single infection!needs at least 3 cases to create a triangle!
        print(paste("Epidemic infection iteration", i, sep = " "))
        infections <- inf.transmission.model(pop, initial.n = 1, t.max = 10, beta = 0.005,
                                                 gamma = 0.5, delta = 0.1, tau = 20) # epidemic
    }
    save(infections, file = paste("results_new_", i, ".RData", sep = ""))
}

for (i in 21:25) {
##for (i in 6:10) {
    infections <- data.frame(x=numeric(0), y=numeric(0), time=numeric(0)) # just so we can check dim()
    while(dim(infections)[1] < 3) {  # need this as infection may die out immediately
        print(paste("Epi-endemic infection iteration", i, sep = " "))
        infections <- inf.transmission.model(pop, initial.n = 1, t.max = 10, beta = 0.004,
                                             gamma = 0.5, delta = 0.5, tau = 10) # endemic
    }
    save(infections, file = paste("results_new_", i, "RData", sep = ""))
}

for (i in 26:30) {
##for (i in 11:15) {
    infections <- data.frame(x=numeric(0), y=numeric(0), time=numeric(0)) # just so we can check dim()
    while(dim(infections)[1] < 3) {  # need this as infection may die out immediately
        print(paste("Epi-endemic infection iteration", i, sep = " "))
        infections <- inf.transmission.model(pop, initial.n = 1, t.max = 10, beta = 0.003,
                                             gamma = 0.5, delta = 0.9, tau = 10) # endemic
    }
    save(infections, file = paste("results_new_", i, ".RData", sep = ""))
}


## note that this takes a long time

## produce sub-sqamples of 500, 100 and 50 to see how well methods cope with reduced data sets.
########################################################################################
## Note that this is now commented out to ensure that the original data sets are used ##
########################################################################################

#for(i in 1:30) {
#    load(file = paste("results_", i, ".RData", sep = ""))
#    for(sample.size in c(500,100,50)) {
#        reduced.infections <- infections[sample(dim(infections)[1], sample.size),]
#        save(reduced.infections, file = paste("results_", sample.size, "_", i, ".RData", sep = ""))
#    }
#}

## now to analyse

library(INLA)

for(i in 1:30)  {
##for(i in 1:15)  {
##for(i in 16:20)  {
    ## data plots

    for(file.size in c("", "500_","100_","50_"))  {
        load(file = paste("results_", file.size, i, ".RData", sep = ""))
        if(file.size != "")  {
            infections <- reduced.infections
        }
                
        jpeg(filename = paste("caseplot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800)
        plot(infections$x, infections$y, col = heat.colors(n=10)[ceiling(infections$time)],
             xlab = "", ylab = "")
        dev.off()
        
        jpeg(filename = paste("histogram_", file.size, i, ".jpg", sep = ""), width = 800, height = 800)
        hist(infections$time, xlab = "Time of infection", main = "Histogram of infection times")
        dev.off()
        
        jpeg(filename = paste("epiplot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800)
        plot(c(0,10), c(0, max(c(infections$S, infections$I, infections$R))), type = "n",
             xlab = "Time", ylab = "Number of animals", main = "Progress of infection")
        lines(infections$time, infections$S, col = "blue")
        lines(infections$time, infections$I, col = "red")
        lines(infections$time, infections$R, col = "green")
        legend(0, 800, c("S", "I", "R"), lty = 1, col = c("blue", "red", "green"))
        dev.off()
        
        cases.points <- unique(infections[,1:2]) # x, y columns
        cases.points$new.index <- 1:length(cases.points$x)
        mesh = inla.mesh.2d(as.matrix(cases.points[,1:2]),
                            boundary = list(##inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.05, resolution = 150)),
                                inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.1, resolution = 100)),
                            max.edge=0.3)
        infections$index <- numeric(length(infections$x))
        for(case in 1:length(infections$x)) {
            infections$index[case] <- cases.points$new.index[(cases.points$x == infections$x[case]) &
                                                             (cases.points$y == infections$y[case])]
        }
        
        formula <- time ~ f(index, model = inla.spde2.matern(mesh,2))
        
        model <- inla(formula, family = "gaussian", data = infections)
        
        save(model, file = paste("model_", file.size, i, ".RData", sep = ""))
        save(mesh, file = paste("mesh_", file.size, i, ".RData", sep = ""))
        
        ## model plots
        
        jpeg(filename = paste("meshplot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
        plot(mesh)
        points(cases.points, col = "red")
        dev.off()
        
        grid <- inla.mesh.projector(mesh)
        mean.for.plot <- inla.mesh.project(grid, model$summary.random$index$mean)
        sd.for.plot <- inla.mesh.project(grid, model$summary.random$index$sd)
        
        jpeg(filename = paste("meanplot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
        image(mean.for.plot, main = "Posterior mean value")
        dev.off()
        
        jpeg(filename = paste("sdplot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
        image(sd.for.plot, main = "Posterior standard deviation")
        dev.off()
        
        ## look at residuals and plot
        
        A <- inla.spde.make.A(mesh, loc=as.matrix(infections[,1:2]))
        post.mean.at.obs <- drop(A%*%model$summary.random$index$mean)
        resid <- post.mean.at.obs - infections$time
        jpeg(filename = paste("mean_residual_plot_", file.size, i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
        plot(post.mean.at.obs, resid, pch = 20, main = "mean-residual plot")
        dev.off()
    }
}

## Data from the Lange et al paper:


library(INLA)

filenames <- c("Cases_endemic_center.txt", "Cases_epidemic_center.txt" )

for(i in 1:length(filenames))  {
    infections <- read.table(file = filenames[i], sep = ";",
                             col.names = c("x", "y", "time"))

    ## rescale time to be on interval(0,10]

    infections$time <- infections$time - min(infections$time) + 1
    infections$time <- infections$time/max(infections$time)*10

    ## rescale x and y to be on interval [0,1]

    infections$x <- (infections$x - min(infections$x))/(max(infections$x) - min(infections$x))
    infections$y <- (infections$y - min(infections$y))/(max(infections$y) - min(infections$y))

    ## plots of raw data
    
    jpeg(filename = paste("caseplot_sim_lange_", i, ".jpg", sep = ""), width = 800,
         height = 800)
    plot(infections$x, infections$y,
         col = heat.colors(n=10)[ceiling(infections$time)],
         xlab = "", ylab = "")
    dev.off()

    jpeg(filename = paste("histogram_sim_lange_", i, ".jpg", sep = ""), width = 800, height = 800)
    hist(infections$time, xlab = "Time of infection", main = "Histogram of infection times")
    dev.off()

    infections$index <- numeric(length(infections$x))
    cases.points <- unique(infections[,1:2]) # x, y columns
    cases.points$index <- 1:length(cases.points$x)
    mesh = inla.mesh.2d(as.matrix(cases.points[,1:2]),
                    boundary = list(##inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.05, resolution = 150)),
                        inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.1, resolution = 100)),
                    max.edge=0.3)

    for(case in 1:length(infections$x)) {
        infections$index[case] <-
            cases.points$index[(cases.points$x == infections$x[case]) &
                               (cases.points$y == infections$y[case])]
    }

    formula <- time ~ f(index, model = inla.spde2.matern(mesh,2))

    model <- inla(formula, family = "gaussian", data = infections, control.inla = list(h = 1e-10))

    save(model, file = paste("model_sim_lange_", i, ".RData", sep = ""))
    save(mesh, file = paste("mesh_sim_lange_", i, ".RData", sep = ""))
    
    ## model plots
    
    jpeg(filename = paste("meshplot__sim_lange_", i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
    plot(mesh)
    points(cases.points, col = "red")
    dev.off()
    
    grid <- inla.mesh.projector(mesh)
    mean.for.plot <- inla.mesh.project(grid, model$summary.random$index$mean)
    sd.for.plot <- inla.mesh.project(grid, model$summary.random$index$sd)

    jpeg(filename = paste("meanplot__sim_lange_", i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
    image(mean.for.plot, main = "Posterior mean value")
    dev.off()
    
    jpeg(filename = paste("sdplot__sim_lange_", i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
    image(sd.for.plot, main = "Posterior standard deviation")
    dev.off()
    
    ## look at residuals and plot
    
    A <- inla.spde.make.A(mesh, loc=as.matrix(infections[,1:2]))
    post.mean.at.obs <- drop(A%*%model$summary.random$index$mean)
    resid <- post.mean.at.obs - infections$time
    jpeg(filename = paste("mean_residual_plot_lange_", i, ".jpg", sep = ""), width = 800, height = 800, pointsize = 14)
    plot(post.mean.at.obs, resid, pch = 20, main = "mean-residual plot")
    dev.off()


}


## Now to look at the real ASF data

library(INLA)

load(file = "ASF.RData")
dim(table2)
## [1] 76  3
names(table2)
## [1] "x" "y" "t"
infections <- table2
names(infections) <- c("y", "x", "time")  # just to make it identical to simulations, above
infections$y <- (infections$y - min(infections$y))/(max(infections$y) - min(infections$y))
infections$x <- (infections$x - min(infections$x))/(max(infections$x) - min(infections$x))
## rescale x and y to be on scale [0..1] as simulated data were

jpeg(filename = "caseplot_asf.jpg", width = 800, height = 800)
plot(infections$x, infections$y, col = heat.colors(n=10)[ceiling((infections$time - min(infections$time))/ (max(infections$time) - min(infections$time))*10)],
     xlab = "", ylab = "")
dev.off()

jpeg(filename = "histogram_asf.jpg", width = 800, height = 800)
hist(infections$time, xlab = "Time of infection", main = "Histogram of infection times")
dev.off()

cases.points <- unique(infections[,2:1]) # x, y columns note reversed in order in asf data
cases.points$new.index <- 1:length(cases.points$x)
mesh = inla.mesh.2d(as.matrix(cases.points[,1:2]),
                    boundary = list(##inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.05, resolution = 150)),
                        inla.nonconvex.hull(points = as.matrix(cases.points[,1:2]), convex=-0.12, resolution = 100)),
                    max.edge=0.3)
infections$index <- numeric(length(infections$x))
for(case in 1:length(infections$x)) {
    infections$index[case] <- cases.points$new.index[(cases.points$x == infections$x[case]) &
                                                     (cases.points$y == infections$y[case])]
}

formula <- time ~ f(index, model = inla.spde2.matern(mesh,2))

model <- inla(formula, family = "gaussian", data = infections)

save(model, file = "model_asf.RData")
save(mesh, file = "mesh_asf.RData")
load("model_asf.RData")
load("mesh_asf.RData")

## model plots

jpeg(filename = "meshplot_asf.jpg", width = 800, height = 800, pointsize = 14)
plot(mesh)
points(cases.points, col = "red")
dev.off()

grid <- inla.mesh.projector(mesh)
mean.for.plot <- inla.mesh.project(grid, model$summary.random$index$mean)
sd.for.plot <- inla.mesh.project(grid, model$summary.random$index$sd)

jpeg(filename = "meanplot_asf.jpg", width = 800, height = 800, pointsize = 14)
image(mean.for.plot, main = "Posterior mean value")
dev.off()

jpeg(filename = "sdplot_asf.jpg", width = 800, height = 800, pointsize = 14)
image(sd.for.plot, main = "Posterior standard deviation")
dev.off()
        
## look at residuals and plot

A <- inla.spde.make.A(mesh, loc=as.matrix(infections[,2:1]))
post.mean.at.obs <- drop(A%*%model$summary.random$index$mean)
resid <- post.mean.at.obs - infections$time
jpeg(filename = "mean_residual_plot_asf.jpg", width = 800, height = 800, pointsize = 14)
plot(post.mean.at.obs, resid, pch = 20, main = "Mean-residual plot", xlab= "Mean of posterior distribution", ylab = "Residual")
dev.off()

