#
# This code applies the moving window functions and saves the results in a data file
# that is then used by the main paper to make the figures.
#
# AUTHOR: Jonathan S Reeves and Shannon P. McPherron
#
# Last modified November 14, 2018
#
############################

## This code requires the following libraries or packages
##    sp, spdep
## Additionally, the code is optimized for multiple threads using these packages
##    doParallel and foreach
## If threading causes problems, they can easily be written out of the moving_window_par function


## The following three functions are the core of the moving window analysis.
## The first computes the test statistic for the window around each artifact.
## The second determines what spatial scale gives the most statistically signicant clustering results.
## The third one computes the clustering at that scale.

moving_window_par <- function(dat, eq, neighborhood, colname, ncores = ncores) {
  ## dat = the spatial dataframe that holds the data to be tested
  ## eq = the function to be applied to each neighborhood
  ## neighborhood = the size of the moving window
  ## colname = the name given to the column that holds the result
  ## ncores = the number of CPU cores to be used (careful - a number larger than the available cores with crash the program)  

  ## The par in the function name refers to the parallelization done here.
  library(doParallel)
  library(foreach)
  
  print(paste("  Starting moving_window_par using",ncores,"CPU cores."))
  cl <- makeCluster(ncores)
  registerDoParallel(cl)
  results <- foreach(i = 1:nrow(dat@data), .combine = c, .packages= c('rgeos', "sp")) %dopar% {
    buff <- gBuffer(spgeom = dat[i,], width = neighborhood, capStyle = "ROUND", joinStyle = "ROUND")
    eq(subset(dat, over(dat, buff) == 1), neighborhood)
  }
  dat$eq = results
  names(dat)[names(dat)=="eq"] <- colname
  stopCluster(cl)
  return(dat)
}  

## This function is not used in this version of our code.  It was an earlier version
## that is now more optimized with the mi.dist.par() function that follows.  However,
## we have left it here because it returns a data.frame that is more useful for
## examining the effects of scale on the Moran I's index.
mi.dist.plot <- function(dat, var, lower, icr, n) {
  ## dat = the spatial dataframe that holds the data that you want to test 
  ## var = the variable from the moving window results that you are testing
  ## lower = the lower bound for your neighborhood
  ## icr = the increment that you want your neighborhood to increase by 
  ## n = the number of increments to run
  
  q <- data.frame("band" = 1:n,
                  "neighborhood" = 1:n,
                  "morans.I" = 1:n,
                  "p.value" = 1:n,
                  "class" = 1:n)
  dmax <- lower
  for (i in 1:n){
    print(i) ## a progress bar to make sure its actually doing something and not crashed. 
    dmax <- dmax + icr
    q$neighborhood[i] <- dmax
    nb <- dnearneigh(dat, d1 = lower, d2 = dmax)
    wt <- nb2listw(nb, zero.policy = TRUE)
    mt <- moran.test(var, listw = wt, zero.policy = TRUE, na.action = na.exclude)
    q$morans.I[i] <- mt$estimate[1]
    q$p.value[i] <- mt$p.value
  }

  f <- list(correlo.dat = q, scale.band = q$neighborhood[q$morans.I == max(q$morans.I)])
  return(f)
  
}

## This function is the same as the one just above except that it has been optimized
## to use multiple cores and to return just the statistic that we need for the 
## analysis that follows after.
mi.dist.par <- function(dat, var, lower, icr, n, ncores = ncores) {
  ## dat = the spatial dataframe that holds the data that you want to test 
  ## var = the variable from the moving window results that you are testing
  ## lower = the lower bound for your neighborhood
  ## icr = the increment that you want your neighborhood to increase by 
  ## n = the number of increments to run

  ## The par in the function name refers to the parallelization done here.
  library(doParallel)
  library(foreach)

  cl <- makeCluster(ncores)
  registerDoParallel(cl)
  
  print(paste("  Starting mi.dist.par using",ncores,"CPU cores."))
  
  results <- foreach(dmax = seq(lower + icr, by = icr, length.out = n),
                     .combine = c, .packages= c('spdep')) %dopar% {
                       nb <- dnearneigh(dat, d1 = lower, d2 = dmax)
                       wt <- nb2listw(nb, zero.policy = TRUE)
                       mt <- moran.test(var, listw = wt, zero.policy = TRUE, na.action = na.exclude)
                       mt$estimate[1]
                     }
   stopCluster(cl)
  return(seq(lower + icr, by = icr, length.out = n)[which.max(results)])
}



local.moransI <- function(dat, lower, upper, var, colname) {
  ## dat <- must be a matrix of coordinates. SpatialPointDataFrames work on their own. If you are using a SpatialPolygon then you need to the coordinates() function retrieve its coordinate matrix. 
  ## lower <- minimum neighborhood distance. 
  ## upper <- maximum neighborhood distance.
  ## var <- a continuous variabile you are interested in.
  ## colname <- the name of the column for the results

  print("  Computing local.moransI.")
  
  nb <- dnearneigh(x = dat, d1 = lower, d2 = upper)
  wt <- nb2listw(nb, zero.policy = TRUE)
  lm <- localmoran(var, wt, zero.policy = TRUE, p.adjust.method = "bonferroni", na.action = na.exclude)
  lmI <- as.data.frame(lm)
  dat$LMI_Z <- lmI$Z.Ii 
  dat$S_COT <- "Nonsig" #### Where Cluster Outlier Data will be stored
  dat$S_COT[dat$LMI_Z >= 1.96 & var > mean(var)] <- "HH"
  dat$S_COT[dat$LMI_Z >= 1.96 & var < mean(var)] <- "LL"
  dat$S_COT[dat$LMI_Z <= -1.96 & var < mean(var)] <- "LH"
  dat$S_COT[dat$LMI_Z <= -1.96 & var > mean(var) ] <- "HL"
  names(dat)[names(dat)=="LMI_Z"] <- paste0(colname,"_Z")
  names(dat)[names(dat)=="S_COT"] <- paste0(colname,"_COT")
  return(dat)
} 

############################
#
# What follows are a series of functions that are used to characterize each neighborhood.
# These functions are passed to the moving.window.par() function listed above.
#
############################

# Density of Artifacts
GPF.Density <- function(dat, neighborhood) {
  nrow(dat)  
} 


#Weight ratio
Weight2layer <- function(dat, neighborhood){
  median(dat$WEIGHT)   #layer median is .1, mean is 2.69
}

#Microdebris 2D ratio 
Micro2D <- function(dat, neighborhood){
  length(subset(dat, CUTOFF == FALSE)) / nrow(dat)
}

# Microdebris no quemado contra todo no quemado
ub.micro <- function(dat, neighborhood){
  dat <- subset(dat, BURNT==FALSE)
  length(subset(dat, CUTOFF == FALSE)) / nrow(dat)
} 

# Densidad de microdebris
dens.microdeb <- function(dat, neighborhood){
  length(subset(dat, CUTOFF == FALSE))
} 

# Densidad de microdebris no quemado
dens.ub.micr <- function(dat, neighborhood){
  length(subset(dat, CUTOFF == FALSE & BURNT == FALSE))
}




#Large artifact 2D ratio (DC5 stands for remains >200 mm2, that represent 26% of the assemblage)
Large2D <- function(dat, neighborhood){
  length(subset(dat, DIM_CLASS == "DC5")) / nrow(dat)
}




## % of burnt elements, accounting for everything
burnt.elements <- function(dat, neighborhood) {
  length(subset(dat, BURNT == TRUE)) / nrow(dat)
}

# Burning Ratio (Standardized by flakes)
# Name stands for burned flakes to unburned flakes
bflk2ubflk <- function(dat, neighborhood) {
  (length(subset(dat, CATEGORY == "PROX_FLAKE" & BURNT == TRUE)) + (length(subset(dat, CATEGORY == "FLAKE" & BURNT == TRUE))))/ 
    (length(subset(dat, CATEGORY == "PROX_FLAKE")) + (length(subset(dat, CATEGORY == "FLAKE"))))
}

#Burnt complete elements filtered
burntcomplete <- function(dat, neighborhood) {
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="PROX_FLAKE"|CATEGORY=="CORE"|CATEGORY=="RETOUCHED"|CATEGORY=="BLADELET")
  length(subset(dat, BURNT == TRUE)) / nrow(dat)
}  

#Burnt mass ratio (Acounting for all the lithic remains in the neighborhood)
burnt.mass <- function(dat, neighborhood) {
  sum(dat$WEIGHT[dat$BURNT == TRUE]) / sum(dat$WEIGHT)
}






#Patiantion ratio (for every remain in the neighborhood, considering the partially patinated as patinated and accounting just for the remains completely free of patina)
Patin.ratio <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="PROX_FLAKE"|CATEGORY=="CORE"|CATEGORY=="RETOUCHED"|CATEGORY=="BLADELET")
  length(subset(dat, PATINA == "NP")) / nrow(dat)
}

# Retouched to flake Ratio (with size cutoff that exclude the non-geometric microliths)
retouched2flake2 <- function(dat, neighborhood) {
  (length(subset(dat, CATEGORY == "RETOUCHED")) + (length(subset(dat, CATEGORY == "RETOUCHED_FRAG")))) / 
    (length(subset(dat, CATEGORY == "FLAKE")) + (length(subset(dat, CATEGORY == "PROX_FLAKE"))))
}

# Flake to core ratio (with size cutoff for the flakes and proximal flakes)
flk2core <- function(dat, neighborhood) {
  length(subset(dat, CATEGORY == "CORE")) / 
    (length(subset(dat, CATEGORY == "FLAKE" & CUTOFF == TRUE)) + (length(subset(dat, CATEGORY == "PROX_FLAKE" & CUTOFF == TRUE))))
}




# Cortex To Mass ratio (filtered categories)
cortex2mass <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="BLADELET"|CATEGORY=="BLADELET_FRAG"|CATEGORY=="FLAKE_FRAG"|CATEGORY=="PROX_FLAKE")
  sum(dat$CTX_SURF_AREA) / sum(dat$WEIGHT)
}

# Cortex to area ratio (filtered categories)
cortex2area <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="BLADELET"|CATEGORY=="BLADELET_FRAG"|CATEGORY=="FLAKE_FRAG"|CATEGORY=="PROX_FLAKE")
  sum(dat$CTX_SURF_AREA) / sum(dat$AREA)
}

# Mean pctg ctx without restriction
meanctxpctg <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="BLADELET"|CATEGORY=="BLADELET_FRAG"|CATEGORY=="FLAKE_FRAG"|CATEGORY=="PROX_FLAKE")
  mean(dat$CTX_PCTG)
}

# Mean pctg ctx restricted to BP and BPF
meanctxpctgrestr <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="PROX_FLAKE")
  mean(dat$CTX_PCTG)
}

# Restricted ctx area to area
ctx2arearestr <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE"|CATEGORY=="PROX_FLAKE"|CATEGORY=="BLADELET")
  sum(dat$CTX_SURF_AREA) / sum(dat$AREA)
}


# Breakage ratio (Name stands for unburned flake breakage) With size cutoff
ubflkbreak <- function(dat, neighborhood) {
  length(subset(dat, CATEGORY == "PROX_FLAKE" & BURNT == FALSE & CUTOFF == TRUE)) / 
    (length(subset(dat, CATEGORY == "PROX_FLAKE" & BURNT == FALSE & CUTOFF == TRUE)) + (length(subset(dat, CATEGORY == "FLAKE" & BURNT == FALSE & CUTOFF == TRUE))))
}






# Flake Weight compared to layer
med.weight.comp.ub.flakes <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY == "FLAKE")
  median(dat$WEIGHT, na.rm = TRUE)
}

# Flake area compared to layer
med.area.comp.ub.flakes <- function(dat, neighborhood){
  dat <- subset(dat, CATEGORY == "FLAKE")
  median(dat$AREA, na.rm = TRUE)
}

# Median flake weigth within the neighborhood, without contrasting to anything
med.flake.weight <- function(dat, neighborhood) {
  dat <- subset(dat, CATEGORY == "FLAKE")
  median(dat$WEIGHT, na.rm = TRUE)
}

# Mean flake weigth within the neighborhood, without contrasting to anything
mea.flake.weight <- function(dat, neighborhood) {
  dat <- subset(dat, CATEGORY == "FLAKE")
  mean(dat$WEIGHT, na.rm = TRUE)
}

# Median flake area within the neighborhood, without contrasting to anything
med.flake.area <- function(dat, neighborhood) {
  dat <- subset(dat, CATEGORY == "FLAKE")
  median(dat$AREA, na.rm = TRUE)
}

# Mean flake area within the neighborhood, without contrasting to anything
mea.flake.area <- function(dat, neighborhood) {
  dat <- subset(dat, CATEGORY == "FLAKE")
  mean(dat$AREA, na.rm = TRUE)
}



#Burnt debris among all debris
b.ub.debris <- function(dat, neighborhood) {
  length(subset(dat, CUTOFF == FALSE & BURNT == TRUE))/ 
    length(subset(dat, CUTOFF == FALSE))
}


# äreas de fracturación térmica: coger todo lo roto y ver burnt true
fract.term <-function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE_FRAG"|CATEGORY=="PROX_FLAKE"|CATEGORY=="CORE_FRAG"|CATEGORY=="RETOUCHED_FRAG"|CATEGORY=="BLADELET_FRAG"|CATEGORY=="<10mm"|CATEGORY=="FRAG"|CATEGORY=="THERM_DEB")
  length(subset(dat, BURNT==TRUE)) / nrow(dat)
}

fract.term.restrc <-function(dat, neighborhood){
  dat <- subset(dat, CATEGORY=="FLAKE_FRAG"|CATEGORY=="PROX_FLAKE"|CATEGORY=="CORE_FRAG"|CATEGORY=="RETOUCHED_FRAG"|CATEGORY=="BLADELET_FRAG"|CATEGORY=="FRAG"|CATEGORY=="THERM_DEB")
  length(subset(dat, BURNT==TRUE)) / nrow(dat)
}


############################
#
# Here begins the main code.
#
############################

#### Load libraries ####
library(sp)
library(spdep)
library(dplyr)

#### Load data ####
## Minimally the data consist of XY coordinates, layers, and columns with attributes on each 
## artifact that go into characterizing each neighborhood using the functions list above.
xdata <- readRDS("Data/AV_2clusters.rds")
xdata$SITE = 'AV'
#xdata$LEVEL <- as.factor(xdata$CLUSTER)


xdata = xdata %>%
  select(ID, LEVEL, SITE, SQUARE, MATERIAL, CATEGORY, XABS, YABS, WEIGHT, PATINA, CTX_SURF_AREA, CTX_PCTG, BURNT, DIM_CLASS, CUTOFF, AREA)

#### Script Parameters #####
moranIstartband <- .05
neighborhood.size = .56 # Set the size of the neighborhood
ncores = 2  # The moving window function is set up to parallelize work. This should be set to the specifications of your computer. It is safest to level this at 1. Inappropriate allocation of proccessor cores can result in catastrophic crashes.
sample.size <- 3000 # This is for subsampling when using the moran's I for levels that are very big.

# The name of the moving window functions that you wish to have applied to the data set.
fun.names <- c( 
  #GPF.Density,
  #Weight2layer,
  #Micro2D,
  #ub.micro,
  #dens.microdeb,
  #dens.ub.micr
  #Large2D,
  #burnt.elements,
  #bflk2ubflk,
  #burntcomplete,
  #burnt.mass,
  #Patin.ratio,
  #retouched2flake2,
  #flk2core,
  #cortex2mass,
  #cortex2area,
  #meanctxpctg,
  #meanctxpctgrestr,
  #ctx2arearestr
  #ubflkbreak,
  #med.weight.comp.ub.flakes,
  #med.area.comp.ub.flakes,
  #med.flake.weight,
  #mea.flake.weight,
  #med.flake.area,
  #mea.flake.area,
  #b.ub.debris,
  fract.term,
  fract.term.restrc
)

# The names of the columns where the results from the above functions will be stored.
result.cols <- c( 
  #"density",
  #"weight_layer",
  #"microdebris",
  #"microdeb.ub",
  #"microdensity",
  #"ubmicrodens"
  #"large.elements",
  #"burning.elements",
  #"burning.flakes",
  #"burnt.compl",
  #"burning.mass",
  #"patination",
  #"retouched2flake",
  #"core2flake",
  #"cortex.to.mass",
  #"cortex.to.area",
  #"mean.pctg",
  #"mean.pctg.restr",
  #"rest.ctx.area"
  #"breakage",
  #"flake.weight",
  #"flake.area",
  #"median.flake.w",
  #"mean.flake.w",
  #"median.flake.a",
  #"mean.flake.a",
  #"burnt.microdebris"
  "termic.fract",
  "rest.termic.fract"
)

# The archaeological layers for which the analysis will be applied
levels <- c("A", "B")
sites <- c("AV")

#### Analysis Script ####

RESULT = NA

xdata = data.frame(xdata)

for(i in sites) {
  site <- i
  print(paste('Working on Site',site))
  AV <- subset(xdata, SITE == site) ## Get just the level we are working on
  level.size = nrow(AV)
  
  coordinates(AV) <- c("XABS", "YABS") ## Turns it into a spatial data set

  ## Loop through each of the functions
  for(j in 1:length(fun.names)){

    print(paste(' Looking at',result.cols[j]))
    
    
    ## Apply the moving window analysis
    AV <- moving_window_par(dat = AV,
                             eq = fun.names[[j]],
                             neighborhood = neighborhood.size,
                             colname = result.cols[j],
                             ncores = ncores )
    
    ## Replace infinte results (like dividing by zero) with zero
    AV@data[!is.finite(AV@data[,result.cols[j]]), result.cols[j]] <- 0
    
    ## Adjust neighborhood flake weight by the layer median flake weight
    if (result.cols[j]=="flake.weight") {
      AV@data[,"flake.weight"] = AV@data[,"flake.weight"] / median(AV@data$WEIGHT, na.rm = TRUE)
    }
    
    ## Adjust neighborhood flake area by the layer median flake area
    if (result.cols[j]=="flake.area") {
      AV@data[,"flake.area"] = AV@data[,"flake.area"] / median(AV@data$AREA, na.rm = TRUE)
    }
    
    ##Adjust neighborhood median weight by the layer median weight
    if (result.cols[j]=="weight_layer") {
      AV@data[,"weight_layer"] = AV@data[,"weight_layer"] / median(AV@data$WEIGHT, na.rm = TRUE)
    }
    
    
    ## Before computing spatial scale for the analysis,
    ## if the sample is large, subsample it.
    #if (level.size > sample.size) {
     # AV.sample <- AV[sample(nrow(AV), sample.size),] }
    #else {
    #  AV.sample <- AV }

    ## Get the scale at which there is the most clustering
    ##mi.dist.par does it using parallel processes and returning just the value of higher spatial autocorrelation
    ##mi.dist.plot besides selecting the value of higher spatial autocorrelation, provides the graphic
    #correlation  <- mi.dist.par(dat = AV.sample,
    #                            var = AV.sample@data[,result.cols[j]],
    #                            lower = moranIstartband,
    #                            icr = .05,
    #                            n = 30,
    #                            ncores = ncores)
    
    #After calculating the metrics for all the material, loop the Moran's I through the sectors to have more accuracy in the correspondence.
    
  }
}


    for (k in levels) {
      level <- k
      print(paste('Working on Level',level))
      AV2 <- subset(AV, LEVEL == level) ## Get just the level we are working on
      
      for(j in 1:length(fun.names)){
        
        print(paste(' Looking at',result.cols[j]))
        
   
    band_analysis  <- mi.dist.plot(dat = AV2,
                                   var = AV2@data[,result.cols[j]],
                                   lower = moranIstartband,
                                   icr = .05,
                                   n = 15)
    
    png(filename = paste("mi.dist.plot_", level,"_", result.cols[j], ".png", sep = ""))
    plot(band_analysis[["correlo.dat"]][["neighborhood"]], band_analysis[["correlo.dat"]][["morans.I"]], tck = 1, pch = 20, xlab = "Neighborhood", ylab = "Morans.I", main = paste("Metric:", result.cols[j], "_", level))
    dev.off()
    
    correlation = band_analysis$scale.band
    print(correlation)
   

    ## Computer MoransI at the scale determined in the previous step
    AV2 <- local.moransI(dat = AV2,
                         lower = moranIstartband,
                         upper = correlation,
                         var = AV2@data[,result.cols[j]], 
                         colname = result.cols[j])
    options(warn=2)
    
} 


  ## After repeating each of the test functions for a particular layer
  ## save the results.  Then repeat for the next layer and append
  ## the new results to the of the results output.
  if (length(RESULT)<2) {
    print("Creating Results table")
    RESULT <- AV2 }
  else {
      print("Appending Results Table")
      RESULT <- rbind(RESULT,AV2) }

    }

## After doing all of the test functions for all of the layers
## write the results in an ASCII file that will be read and plotted
## by the main paper.
write.csv(RESULT, "AV_MovingWindow_Results.csv")
