
# http://www.petrkeil.com/?p=1050

# -----------------------------------------------------------------------------------------------------
# options

options(stringsAsFactors = FALSE)


# -----------------------------------------------------------------------------------------------------
# packages

packages <- c("texreg", "lme4", "nlme", "plyr", "MASS", "geosphere",
              "mgcv", "ncf", "spdep", "arm", "gstat", "ape", "automap", "MuMIn") 
sapply(packages, require, character.only = TRUE)


# --------------------------------------------------------------------------------------------------------------------------------
# load data

project_dir <- "path-to-output-directory"
data_dir <- "path-to-input-directory"
setwd(project_dir)

load("data/data_PP.Rdata")

# country data
countyDat <- read.csv(file.path(data_dir, "County Data/Master County Dataset.csv")) 
countyDat$fips <- countyDat$FIPS

# merge in human variables
human_variables <- c("fips", "pop2010", "popdensity")
combined_PP <- merge(combined_PP, countyDat[, human_variables], by = "fips", all.x = TRUE)

# exponentiate logged variables
stay_logged <- c("species.i", "genus.i", "family.i", "MNTD.ALL", "MNTD.INV", "MNTD.NAT", "MNTD.NONINV",
    "MNTD.INTRO", "PD.NONINV", "PD.INTRO", "PD.ii.NII", "PD.ee", "PD.ei", "MPD.ne", "MPD.ei")
exp_variables <- logged_variables[logged_variables %!in% stay_logged]                  
# combined_PP[, exp_variables] <- lapply(combined_PP[, exp_variables], exp)                 
combined_PP[, logged_variables] <- lapply(combined_PP[, logged_variables], exp)  ##########  NEED TO EXPONENTIATE ALL

# number of counties in each state
ddply(combined_PP, .(state), summarise, count = length(fips))


# -----------------------------------------------------------------------------------------------------
# standardize some predictor variables

combined_PP <- within(combined_PP, {
    gmted2010.elev_mean.mn_std <- (gmted2010.elev_mean.mn - mean(gmted2010.elev_mean.mn)) / sd(gmted2010.elev_mean.mn)
    hwsd.t_clay.mn_std <- (hwsd.t_clay.mn - mean(hwsd.t_clay.mn)) / sd(hwsd.t_clay.mn) 
    hwsd.t_ece.mn_std <- (hwsd.t_ece.mn - mean(hwsd.t_ece.mn)) / sd(hwsd.t_ece.mn) 
    hwsd.t_gravel.mn_std <- (hwsd.t_gravel.mn - mean(hwsd.t_gravel.mn)) / sd(hwsd.t_gravel.mn) 
    hwsd.t_oc.mn_std <- (hwsd.t_oc.mn - mean(hwsd.t_oc.mn)) / sd(hwsd.t_oc.mn) 
    hwsd.t_ph.mn_std <- (hwsd.t_ph.mn - mean(hwsd.t_ph.mn)) / sd(hwsd.t_ph.mn) 
    hwsd.t_sand.mn_std <- (hwsd.t_sand.mn - mean(hwsd.t_sand.mn)) / sd(hwsd.t_sand.mn) 
    bio01.mn_std <- (bio01.mn - mean(bio01.mn)) / sd(bio01.mn) 
    bio07.mn_std <- (bio07.mn - mean(bio07.mn)) / sd(bio07.mn) 
    bio08.mn_std <- (bio08.mn - mean(bio08.mn)) / sd(bio08.mn) 
    bio12.mn_std <- (bio12.mn - mean(bio12.mn)) / sd(bio12.mn) 
    bio15.mn_std <- (bio15.mn - mean(bio15.mn)) / sd(bio15.mn) 
    pop2010_std <- (pop2010 - mean(pop2010)) / sd(pop2010)
    popdensity_std <- (popdensity - mean(popdensity)) / sd(popdensity)
    area_std <- (area - mean(area)) / sd(area)
    glaciation_binary <- factor(ifelse(glaciation %in% 1, "no", "yes"))
})


#######################################################################################################
# create autocovariate term

# EPSG:4326 = lat/long, WGS 84
# EPSG:2163 = onshore and offshore equal area projection, where coordinates are in meters
# ESRI:102008 = Albers continental US equal area projection, where coordinates are in meters
# https://source.opennews.org/articles/choosing-right-map-projection/
# https://groups.google.com/forum/#!topic/unmarked/KYrrgnxVzAg

# proj4 projection strings
proj4_lat_lon <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
proj4_US_contig_Albers_eqArea_conic <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" 
proj4_US_Nat_Altas_eqArea <- "+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs" 

# create SpatialPointsDataFrame
coords_lat_lon <- combined_PP
coordinates(coords_lat_lon) <- c("longitude", "latitude")
proj4string(coords_lat_lon) <- CRS(proj4_lat_lon) 
# project coordinates
coords_eqArea <- spTransform(coords_lat_lon, CRS(proj4_US_contig_Albers_eqArea_conic)) 
# create a matrix of coordinates
coords_mat_eqArea <- as.matrix(as.data.frame(coords_eqArea)[, c("longitude", "latitude")])
# store in data frame
#coords_eqArea_dat <- as.data.frame(coords_eqArea)
#combined_PP[, c("longitude_eqArea", "latitude_eqArea")] <- coords_eqArea_dat[, c("longitude", "latitude")]
# create a matrix of coordinates
#coords_mat_eqArea <- as.matrix(combined_PP[, c("longitude_eqArea", "latitude_eqArea")])


# -----------------------------------------------------------------------------------------------------
# determine range of autocorrelation

# range <- autofitVariogram(formula(paste(i, "~ 1")), input_data = coords_eqArea)$var_model[2, "range"]
# range <- 200000
# create neighbour list
# nb_list <- dnearneigh(coords_mat_eqArea, 0, range)
# create weights list
# nb_weights <- nb2listw(nb_list, zero.policy = TRUE)

explain_predictors <- c("bio01.mn_std", "bio07.mn_std", "bio08.mn_std", "bio12.mn_std", "bio15.mn_std", "gmted2010.elev_mean.mn_std",
    "gmted2010.elev_mean.sd", "hwsd.t_clay.mn_std", "hwsd.t_ece.mn_std", "hwsd.t_gravel.mn_std", "hwsd.t_oc.mn_std", "hwsd.t_ph.mn_std", 
    "hwsd.t_sand.mn_std", "glaciation_binary", "area_std", "enow.coast") # "longitude", "latitude", "long_x_lat"
    
# get autocovariate ranges
ranges <- list()

for (i in responseColumns) {
	print(i)
	# create formula for non-spatial model
    fixed <- paste(i, "~", paste(explain_predictors, collapse = " + "))
    random <- "+ (1 | state)"
    # fit non-spatial models and store residuals
    nonSpatial_resid <- as.vector(resid(lmer(formula(paste(fixed, random)), data = combined_PP, na.action = "na.exclude")))
    coords_eqArea@data$nonSpatial_resid <- nonSpatial_resid
    # exclude missing values for the residuals
    coords_eqArea_NA <- coords_eqArea[complete.cases(coords_eqArea@data$nonSpatial_resid), ]
	# determine range of autocorrelation
    ranges[[i]] <- autofitVariogram(formula(paste("nonSpatial_resid", "~ 1")), input_data = coords_eqArea_NA)$var_model[2, "range"]
}

round(sort(unlist(ranges))/1000)
# -----------------------------------------------------------------------------------------------------

# pad out predictions with NAs to length of original data (3066)
pad_NA <- function(x, resid = nonSpatial_resid) {
	present = which(!is.na(resid))
	missing = which(is.na(resid))
    length(x) <- length(resid)
    df <- data.frame(x)
    rownames(df) <- c(present, missing)
    df <- df[order(as.numeric(row.names(df))), ]    
    return(df)
}


rac.model <- function(response = responseColumns, mat = coords_mat_eqArea,
                                    coords_SPDF = coords_eqArea, dat = combined_PP,
                                    predictors = explain_predictors) {
models <- list()
models_2 <- list()
for (i in response) {
	print(i)
	# create formula for non-spatial model
    fixed <- paste(i, "~", paste(predictors, collapse = " + "))
    random <- "+ (1 | state)"
    # fit non-spatial models and store residuals
    nonSpatial_resid <- as.vector(resid(lmer(formula(paste(fixed, random)), data = dat, na.action = "na.exclude")))
    coords_SPDF@data$nonSpatial_resid <- nonSpatial_resid
    # exclude missing values for the residuals
    NA_index <- complete.cases(nonSpatial_resid)
    nonSpatial_resid_NA <- nonSpatial_resid[NA_index]
    coords_SPDF_NA <- coords_SPDF[NA_index, ]
    mat_NA <- mat[NA_index, ]
	# determine range of autocorrelation
    range <- autofitVariogram(formula(paste("nonSpatial_resid", "~ 1")), input_data = coords_SPDF_NA)$var_model[2, "range"]
    range <- as.numeric(ifelse(range > 250000, 250000, range))
    range <- as.numeric(ifelse(range < 70000, 70000, range))
    print(round(range/1000))
    # create neighbour list
    nb_list <- dnearneigh(mat_NA, 0, range)
    # create weights list
    nb_weights <- nb2listw(nb_list, style = "W", zero.policy = TRUE) # S                              # WHERE DOES THIS FACTOR INTO THE BELOW STUFF? autocov_dist() does this
	# calculate residual autocovariate based on above distance and weight from residuals                                                                                           
    dat$rac <- pad_NA(autocov_dist(nonSpatial_resid_NA, mat_NA, nbs = range, type = "inverse", zero.policy = TRUE), resid = nonSpatial_resid)
    dat$rac_std <- (dat$rac - mean(dat$rac, na.rm = TRUE)) / sd(dat$rac, na.rm = TRUE)
    dat$rac2 <- pad_NA(autocov_dist(nonSpatial_resid_NA, mat_NA, nbs = range, type = "inverse.squared", zero.policy = TRUE), resid = nonSpatial_resid)
    dat$rac2_std <- (dat$rac2 - mean(dat$rac2, na.rm = TRUE)) / sd(dat$rac2, na.rm = TRUE) 
    # fit model with residual autocovariate 
    models[[i]] <- lmer(formula(paste(fixed, "+ rac_std", random)), data = dat, na.action = "na.exclude")
    models_2[[i]] <- lmer(formula(paste(fixed, "+ rac2_std", random)), data = dat, na.action = "na.exclude")
}  
idx_squared <- sign(sapply(models, AIC) - sapply(models_2, AIC))
models[idx_squared == 1] <- models_2[idx_squared == 1]
return(models)
}



#######################################################################################################
######################### MIXED MODELS WITH SEPARATE AUTOCOVARIATE TERM #######################
#######################################################################################################

# -----------------------------------------------------------------------------------------------------
# which environmental variables explain diversity?

# lon, lat, enow.coast and area should be included as geographic controls. Glaciation should also be included as environmental variable. 

explain_predictors <- c("bio01.mn_std", "bio07.mn_std", "bio08.mn_std", "bio12.mn_std", "bio15.mn_std", "gmted2010.elev_mean.mn_std",
    "gmted2010.elev_mean.sd", "hwsd.t_clay.mn_std", "hwsd.t_ece.mn_std", "hwsd.t_gravel.mn_std", "hwsd.t_oc.mn_std", "hwsd.t_ph.mn_std", 
    "hwsd.t_sand.mn_std", "glaciation_binary", "area_std", "enow.coast")  # "longitude", "latitude", "long_x_lat"


explain_models_taxon <- rac.model(response = grep("species|genus|family", responseColumns, value = TRUE),
    predictors = explain_predictors)
explain_models_all <- rac.model(response = c("PD.ALL",  "MPD.ALL", "PD.s.ALL", "MPD.s.ALL"),
    predictors = c(explain_predictors, "totalOntree"))
explain_models_n <- rac.model(response = c("PD.NAT",  "MPD.NAT", "PD.s.NAT", "MPD.s.NAT"),
    predictors = c(explain_predictors, "totalOntree.n"))
explain_models_int <- rac.model(response = c("PD.INTRO",  "MPD.INTRO", "PD.s.INTRO", "MPD.s.INTRO"),
    predictors = c(explain_predictors, "totalOntree.int"))                      	
                     	
explain_models <- c(explain_models_taxon, explain_models_all, explain_models_n, explain_models_int)
 
 
save(explain_models, file = "data/explain_models_varImp_ALL_EXPLANATORY.Rdata", compress = "gzip")
# load("data/explain_models_varImp_ALL_EXPLANATORY.Rdata")
 
explain_models_summary <- lapply(explain_models, summary)

sapply(explain_models, AIC)

# delta AIC
# round(sort(sapply(explain_models, AIC) - sapply(explain_models_2, AIC), decreasing = TRUE))

# sorted coefs
explain_coefs <- lapply(explain_models, function(x) round(fixef(x)[grep("hwsd|bio|gmted", names(fixef(x)), value = TRUE)], digits = 2))
explain_coefs_df <- as.data.frame(explain_coefs)
explain_coefs_df$mean <- rowMeans(explain_coefs_df)
explain_coefs_df <- explain_coefs_df[order(abs(explain_coefs_df$mean), decreasing = TRUE), ]
write.csv(explain_coefs_df, file = "data/explain_coefs_varImp_ALL_EXPLANATORY.csv")


pdf("figures/explain_models_varImp.pdf", height = 4, width = 4)
op <- par(mfrow = c(7, 10))
for (i in names(explain_models)) {
    print(plot(explain_models[[i]], main = i))
}
par(op)
dev.off()

# 48489 - Willacy, Texas is a major outlier for MPDs models (area = 1916.84)


# -----------------------------------------------------------------------------------------------------
# relative importance

load("data/explain_models.Rdata")

# from MuMln - take out??? ##################################################################### TAKE OUT
test <- dredge(explain_models[[1]], 
    fixed = c("glaciation", "area", "enow.coast") # "longitude", "latitude", "long_x_lat"
    )
importance(test)
       

# -----------------------------------------------------------------------------------------------------
# final tables

responsesTOTAL <- c("species", "genus", "family", "PD.ALL", "MPD.ALL", "PD.s.ALL", "MPD.s.ALL")
responsesINTRO <- c("species.i", "genus.i", "family.i", "PD.INTRO", "MPD.INTRO", "PD.s.INTRO", "MPD.s.INTRO")
responsesNAT <- c("species.n", "genus.n", "family.n", "PD.NAT", "MPD.NAT", "PD.s.NAT", "MPD.s.NAT")

htmlreg(
    l = explain_models[responsesTOTAL],
    file = "tables/final_explain_table_TOTAL.doc",
    custom.model.names = names(explain_models)[responsesTOTAL],
    caption = "Variables that explain taxonomic and alpha diversity",
    ci.force = TRUE,
    inline.css = FALSE
)

htmlreg(
    l = explain_models[responsesINTRO],
    file = "tables/final_explain_table_INTRO.doc",
    custom.model.names = names(explain_models)[responsesINTRO],
    caption = "Variables that explain taxonomic and alpha diversity",
    ci.force = TRUE,
    inline.css = FALSE
)

htmlreg(
    l = explain_models[responsesNAT],
    file = "tables/final_explain_table_NAT.doc",
    custom.model.names = names(explain_models)[responsesNAT],
    caption = "Variables that explain taxonomic and alpha diversity",
    ci.force = TRUE,
    inline.css = FALSE
)

         
# -----------------------------------------------------------------------------------------------------
# tables

# taxonomic
htmlreg(
    l = explain_models[taxonResponses],
    file = "tables/explain_taxon_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[taxonResponses],
    caption = "Variables that explain taxonomic diversity",
    ci.force = TRUE,
    inline.css = FALSE
)

# alpha diversity 1
htmlreg(
    l = explain_models[alphaResponses[1:6]],
    file = "tables/explain_alpha1_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[alphaResponses[1:6]],
    caption = "Variables that explain alpha diversity 1",
    ci.force = TRUE,
    inline.css = FALSE
)

# alpha diversity 2
htmlreg(
    l = explain_models[alphaResponses[7:12]],
    file = "tables/explain_alpha2_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[alphaResponses[7:12]],
    caption = "Variables that explain alpha diversity 2",
    ci.force = TRUE,
    inline.css = FALSE
)

# alpha diversity 3
htmlreg(
    l = explain_models[alphaResponses[13:18]],
    file = "tables/explain_alpha3_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[alphaResponses[13:18]],
    caption = "Variables that explain alpha diversity 3",
    ci.force = TRUE,
    inline.css = FALSE
)

# alpha diversity 4
htmlreg(
    l = explain_models[alphaResponses[19:24]],
    file = "tables/explain_alpha4_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[alphaResponses[19:24]],
    caption = "Variables that explain alpha diversity 4",
    ci.force = TRUE,
    inline.css = FALSE
)

# alpha diversity 5
htmlreg(
    l = explain_models[alphaResponses[25:30]],
    file = "tables/explain_alpha5_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[alphaResponses[25:30]],
    caption = "Variables that explain alpha diversity 5",
    ci.force = TRUE,
    inline.css = FALSE
)

# beta diversity 1
htmlreg(
    l = explain_models[betaResponses[1:10]],
    file = "tables/explain_beta1_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[betaResponses[1:10]],
    caption = "Variables that explain beta diversity 1",
    ci.force = TRUE,
    inline.css = FALSE
)

# beta diversity 2
htmlreg(
    l = explain_models[betaResponses[11:20]],
    file = "tables/explain_beta2_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[betaResponses[11:20]],
    caption = "Variables that explain beta diversity 2",
    ci.force = TRUE,
    inline.css = FALSE
)

# beta diversity 3
htmlreg(
    l = explain_models[betaResponses[21:30]],
    file = "tables/explain_beta3_rac_raw_ALL_EXP.doc",
    custom.model.names = names(explain_models)[betaResponses[21:30]],
    caption = "Variables that explain beta diversity 3",
    ci.force = TRUE,
    inline.css = FALSE
)



