#Title      : SWEEP023 Habitat Classification Tool
# License       : CC BY-NC-SA 4.0 - Attribution-NonCommercial-ShareAlike 4.0 International
# Objective : To use ESA Sentinel-2 imagery to create a land cover map
#             with associated accuracy map and metrics
# Created by: Naomi Gatis
# Created on: 16/12/2021

#----------- Project Set Up -----------------------------------------------------------------------------------------------
# Check that the required packages are installed
list.of.packages <- c("caret", "doParallel", "diffeR", "e1071", "here", "MASS", "packrat",
                      "randomForest", "raster", "rgdal", "rgeos", "splitstackshape", "svDialogs")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
remove(list.of.packages, new.packages)

library(caret)        # Classification and Regression Training - RF with raster
library(doParallel)   # Foreach Parallel Adapter for the parallel Package
library(diffeR)       # Metric of Difference for Comparing Pairs of Maps
library(e1071)        # Misc Functions of the Department of Statistics, Probability Theory Group
library(here)         # A Simpler Way to Find Your Files
library(MASS)         # Support Functions and Datasets for Venables and Ripleys Modern Applied Statistics
                      # with S - logistical regression
library(packrat)      # A Dependency Management System for Projects and their R Package Dependencies
library(randomForest) # Breiman and Cutler's Random Forests for Classification and Regression
library(raster)       # Geographic Data Analysis and Modeling
library(rgeos)        # Interface to Geometry Engine Open Source ('GEOS')
library(rgdal)        # Bindings for the Geospatial Database Abstraction Library
library(splitstackshape) # Stack and Reshape Datasets After Splitting Concatenated Values
                      # enables random stratified sampling of training/testing pixels
library(svDialogs)    #'SciViews' - Standard Dialogue Boxes for Windows, MacOs and Linuxes

#----------- Get Inputs ------------------------------------------------------------------------------------------
year<- dlgInput("Year of interest", Sys.info()["2019"])$res
aoiFilename <- dlg_open("Inputs/DNPA_Extent", "Shapefile of area of interest")$res
springImgFilename <- dlg_open("Inputs", "Select Spring Image File")$res
summerImgFilename <- dlg_open("Inputs", "Select Summer Image File")$res
pixelsFilename  <- dlg_open("Training_Data", "Select training/testing photo-interepreted pixels .shp file")$res
classNamesFilename <- dlg_open("Inputs", "Select Table (.csv) of class names")$res

#----------- Load Images  ----------------------------------------------------------

img <- stack(springImgFilename, summerImgFilename)

aoiExtent <- shapefile(aoiFilename)
aoiExtent <- spTransform(aoiExtent, crs(img))
aoiExtentBuff <- gBuffer(aoiExtent, width = 20, quadsegs = 10)

img <- crop(img,aoiExtentBuff)

#----------- DTM  --------------------------------------------------------

if(file.exists("./Inputs/DTM/DTM_WGS.tif")) {
    DTM <- raster("./Inputs/DTM/DTM_WGS.tif")} else{
    DTMFilename <- dlg_open("Inputs/DTM", "DTM GeoTiff, if not availble click cancel")$res
    uplandsDefinition <- dlgInput("Minimum elevation of Uplands (m)", Sys.info()["300"])$res
    DTM <- raster(DTMFilename)

    cores=detectCores()
    cl <- makePSOCKcluster(cores[1]-1)
    registerDoParallel(cl)

    uplandsDefinition<- as.numeric(uplandsDefinition)
    slope <- setValues(raster(img[[1]]), 0)
    aspect <- slope
    uplandsExtent <- slope

    DTM <- projectRaster(DTM, crs=crs(img))
    DTM <- resample(DTM, img[[1]], method='bilinear')
    DTM <- crop(DTM,extent(img))

    slope <- terrain(DTM, opt= "slope", unit = "degrees", neighbours = 8)
    aspect <- terrain(DTM, opt= "aspect", unit = "degrees", neighbours = 8)
    uplandsExtent <- DTM > uplandsDefinition

    stopCluster(cl)

    writeRaster(DTM, paste0("./Inputs/DTM/DTM_WGS", projsep = ""), format = "GTiff", overwrite = TRUE)
    writeRaster(slope, paste0("./Inputs/DTM/slope", projsep = ""), format = "GTiff", overwrite = TRUE)
    writeRaster(aspect, paste0("./Inputs/DTM/aspect", sep = ""), format = "GTiff", overwrite = TRUE)
    writeRaster(uplandsExtent, paste0("./Inputs/DTM/uplandExtent", sep = ""), format = "GTiff", overwrite = TRUE)

    remove(DTMFilename, uplandsDefinition)
}

#----------- Slope  ----------------------------------------------------------

if(file.exists("./Inputs/DTM/slope.tif")) {
    slope <- raster("./Inputs/DTM/slope.tif")} else {
      DTM <- raster("./Inputs/DTM/DTM_WGS.tif")

      cores=detectCores()
      cl <- makePSOCKcluster(cores[1]-1)
      registerDoParallel(cl)

      slope <- setValues(raster(img[[1]]), 0)
      slope <- terrain(DTM, opt= "slope", unit = "degrees", neighbours = 8)

      stopCluster(cl)

      writeRaster(slope, paste0("./Inputs/DTM/slope", projsep = ""), format = "GTiff", overwrite = TRUE)

       }

#----------- Aspect  ----------------------------------------------------------
if(file.exists("./Inputs/DTM/aspect.tif")) {
    aspect <- raster("./Inputs/DTM/aspect.tif")} else {
      DTM <- raster("./Inputs/DTM/DTM_WGS.tif")

      cores=detectCores()
      cl <- makePSOCKcluster(cores[1]-1)
      registerDoParallel(cl)

      aspect <- setValues(raster(img[[1]]), 0)
      aspect <- terrain(DTM, opt= "aspect", unit = "degrees", neighbours = 8)

      stopCluster(cl)

      writeRaster(aspect, paste0("./Inputs/DTM/aspect", sep = ""), format = "GTiff", overwrite = TRUE)

      }

#----------- Uplands Extent  ----------------------------------------------------------
if(file.exists("./Inputs/DTM/uplandExtent.tif")) {
      uplandsExtent <- raster("./Inputs/DTM/uplandExtent.tif")} else {
      DTM <- raster("./Inputs/DTM/DTM_WGS.tif")
      uplandsDefinition <- dlgInput("Minimum elevation of Uplands (m)", Sys.info()["300"])$res

      cores=detectCores()
      cl <- makePSOCKcluster(cores[1]-1)
      registerDoParallel(cl)

      uplandsDefinition<- as.numeric(uplandsDefinition)
      uplandsExtent <- setValues(raster(img[[1]]), 0)
      uplandsExtent <- DTM > uplandsDefinition

      stopCluster(cl)

      writeRaster(uplandsExtent , paste0("./Inputs/DTM/uplandExtent", sep = ""), format = "GTiff", overwrite = TRUE)

      remove(uplandsDefinition)
       }


#----------- PeatExtent  ----------------------------------------------------------

if(file.exists("./Inputs/Peat_Extent/peatExtent.tif")) {
    peatExtent <- raster("./Inputs/Peat_Extent/peatExtent.tif")} else {
        peatExtentFilename <- dlg_open("Inputs/Peat_Extent", "Shapefile of peat extent, if not availble click cancel")$res
        peatExtent <-setValues(raster(img[[1]]), 0)

        if(exists('peatExtentFilename')) {
          peatExtentShp <- shapefile(peatExtentFilename)

          cores=detectCores()
          cl <- makePSOCKcluster(cores[1]-1)
          registerDoParallel(cl)

          peatExtentShp <- spTransform(peatExtentShp, crs(img))
          peatExtent <- mask(peatExtent, peatExtentShp, updatevalue=1, inverse = TRUE)

          stopCluster(cl)

          writeRaster(peatExtent, paste0("./Inputs/Peat_Extent/peatExtent", sep = ""), format = "GTiff", overwrite = TRUE)

          remove(peatExtentFilename, peatExtentShp)
          }
    }

#----------- Derive Vegetation Indicies ----------------------------------------------------------

names(img) <- c("B2_Sp", "B3_Sp", "B4_Sp", "B5_Sp", "B6_Sp", "B7_Sp", "B8_Sp", "B11_Sp", "B8a_Sp",
                 "B2_Sum", "B3_Sum", "B4_Sum", "B5_Sum", "B6_Sum", "B7_Sum", "B8_Sum", "B11_Sum", "B8a_Sum")

ExG_Sp <- overlay(img$B3_Sp,img$B4_Sp,img$B2_Sp, fun = function (x,y,z){(2*x)-y-z})
DVI_Sp <- overlay(img$B8_Sp, img$B4_Sp, fun = function (x,y){x-y})
NDWI_Sp <-overlay(img$B3_Sp,img$B8_Sp, fun = function(x,y){(x-y)/(x+y)})
MGVRI_Sp <- overlay(img$B3_Sp,img$B4_Sp, fun = function(x,y){((x*x)-(y*y))/((x*x)+(y*y))})

ExG_Sum <- overlay(img$B3_Sum,img$B4_Sum,img$B2_Sum, fun = function (x,y,z){(2*x)-y-z})
DVI_Sum <- overlay(img$B8_Sum, img$B4_Sum, fun = function (x,y){x-y})
NDWI_Sum <-overlay(img$B3_Sum,img$B8_Sum, fun = function(x,y){(x-y)/(x+y)})
MGVRI_Sum <- overlay(img$B3_Sum,img$B4_Sum, fun = function(x,y){((x*x)-(y*y))/((x*x)+(y*y))})


VI<- stack(ExG_Sp, DVI_Sp, NDWI_Sp, MGVRI_Sp, ExG_Sum, DVI_Sum, NDWI_Sum, MGVRI_Sum, slope, aspect)
names(VI) <- c("ExG_Sp","DVI_Sp","NDWI_Sp", "MGVRI_Sp", "ExG_Sum","DVI_Sum","NDWI_Sum","MGVRI_Sum","Slope", "Aspect")

img <- stack(img, VI)
img <- brick(img)

#----------- Load Training Data, derive band info for all photo-interpreted points-----------------------------
#----------- Stratified Random split Training (75%) and Testing (25%) Data-------------------------------------------------

remove(ExG_Sp,DVI_Sp,NDWI_Sp, MGVRI_Sp, ExG_Sum, DVI_Sum, NDWI_Sum, MGVRI_Sum, slope, aspect,
       VI, springImgFilename, summerImgFilename, aoiFilename, aoiExtentBuff, DTM)

trainData <- shapefile(pixelsFilename)
trainData <- spTransform(trainData, crs(img))

responseCol <- "Class"

dfTrain = data.frame(matrix(nrow = nrow(trainData), ncol = length(names(img)) + 1))
colnames(dfTrain) <- c(names(img),"Class")
for (i in 1:nrow(trainData)){
  imglist <- extract(img, trainData[i,])
  dfTrain[i,1:28]<- data.frame(matrix(unlist(imglist), nrow=1, ncol = length(names(img))))
  dfTrain[i,29] <- trainData$Class[i]
}

remove(pixelsFilename,i, imglist, responseCol, trainData)

dfTrain$Class <- as.factor(dfTrain$Class)
dfTrain$Class <- droplevels(dfTrain$Class)


#group Upland and Lowland acid grassland
levels(dfTrain$Class)[levels(dfTrain$Class)=="3"] <- "1"
levels(dfTrain$Class)[levels(dfTrain$Class)=="303"] <- "1"
#group Upland and Lowland Calcareous grassland
levels(dfTrain$Class)[levels(dfTrain$Class)=="5"] <- "4"
#group Upland and Lowland meadows
levels(dfTrain$Class)[levels(dfTrain$Class)=="7"] <- "6"
#group Upland and Lowland heathland
levels(dfTrain$Class)[levels(dfTrain$Class)=="13"] <- "14"
levels(dfTrain$Class)[levels(dfTrain$Class)=="314"] <- "14"
#group reservoirs and rivers
levels(dfTrain$Class)[levels(dfTrain$Class)=="30"] <- "29"

dfTrain$Class <- droplevels(dfTrain$Class)
dfTrain[is.na(dfTrain)] <- 99999

id <- rownames(dfTrain)
dfTrain <- cbind(id=id, dfTrain)
trainPix <- stratified(dfTrain, "Class", 0.75)
testPix <- subset(dfTrain,!(dfTrain$id%in%trainPix$id))
trainPix <- trainPix[,2:30]
testPix <- testPix[,2:30]

#----------- Random Forest Classifier ----------------------------------------------------------------------------
remove(dfTrain, id)

###  10 fold cross-validation repeated 10 times with grid search to tune mtry
control <- trainControl(method="repeatedcv", number=10, repeats=10, search = "grid")

cores=detectCores()
cl <- makePSOCKcluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl)

modFit_rf <- train(Class~.,
                   method = "rf",
                   data = trainPix,
                   metric = "Accuracy",
                   tuneGrid = expand.grid(.mtry=c(1:15)),
                   trControl=control)
stopCluster(cl)


saveRDS(modFit_rf, paste0("./Models/",year,"RF_model.rds", sep = ""))

#----------- Classify Unknown Areas using trained Random Forest Classifier----------------------------------------
remove (cl, control, trainPix)

n <- as.numeric(nlevels(testPix$Class))

cl2 <- makePSOCKcluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl2)
img <- reclassify(img, cbind(NA, 99999)) #removes NA from rasters to match RF - using na.omit would leave unwanted gaps
preds_rf <- predict(img, modFit_rf)
stopCluster(cl2)
remove(cl2, n)

preds_rf <- mask(preds_rf,aoiExtent)

#----------- Confusion Matrix by Pixel Count ---------------------------------------------------------------------

predictedPixels <- predict(modFit_rf, testPix)
conMatrix <- confusionMatrix(predictedPixels, testPix$Class)
overallAccuracy <- round(sum(diag(conMatrix$table))/sum(conMatrix$table)*100, digits = 1)

pixelClasses <- matrix(row.names(conMatrix$table), ncol = 1)
pixelClasses <- as.data.frame(pixelClasses)
names(pixelClasses) <- "Class"
popnCount <- as.data.frame(table(preds_rf[]))
names(popnCount) <- c("Class", "Count")
pixelCount <- merge (pixelClasses, popnCount, by = "Class", all.x = TRUE)

popnConMatrix <-sample2pop(conMatrix$table, population = pixelCount)
overallAccuracyPopn <- sum(diag(popnConMatrix))/sum(popnConMatrix)*100
overallAccuracyPopn <- round(overallAccuracyPopn, digits = 1)
conMatrixTable <- as.data.frame.matrix(conMatrix$table)

classesUsed<- names(conMatrixTable)
classesUsed<- as.data.frame(as.numeric(unlist(classesUsed)))
names(classesUsed) <- "Class"

classNames<-read.csv(classNamesFilename)
classesUsed <- merge(classesUsed, classNames, by = "Class", all.x = TRUE)

classesUsed$ref <- paste(as.character('Photo-Interpreted as '), as.character(classesUsed$Class.Names), sep = "")
classesUsed$pred <- paste(as.character('Mapped as '), as.character(classesUsed$Class.Names), sep = "")

names(conMatrixTable) <- classesUsed$ref
row.names(conMatrixTable) <-classesUsed$pred

Class_Based_Users_Accuracy<- round(diag(conMatrix$table)/rowSums(conMatrix$table)*100, digits = 1)
conMatrixTable <- cbind(conMatrixTable, Class_Based_Users_Accuracy)

emptyRows=matrix(c(rep.int(NA,length(conMatrixTable))),nrow=2,ncol=length(conMatrixTable))
colnames(emptyRows) <-  colnames(conMatrixTable)
rownames(emptyRows) <- c("Overall User's Accuracy", "Population Adjusted Overall User's Accuracy")
conMatrixTable <- rbind(conMatrixTable, emptyRows)

conMatrixTable[nrow(conMatrixTable)-1,ncol(conMatrixTable)] <- overallAccuracy
conMatrixTable[nrow(conMatrixTable),ncol(conMatrixTable)]<- overallAccuracyPopn

write.csv(conMatrixTable, paste0("./Habitat_Classification_Maps/", year, "ConfusionMatrix.csv"))

#----------- Reclassify lowlands/uplands and classes overlying degraded blanket bog ------------------------------

remove(conMatrix, conMatrixTable, Class_Based_Users_Accuracy, overallAccuracy,
       overallAccuracyPopn, modFit_rf, emptyRows, pixelClasses, popnCount, aoiExtent)

reclassfun <- function(x) {
   ifelse((x[1] == 1  && x[2] == 1), 303,   #303 Acid grass over degraded blanket bog
      ifelse((x[1] == 14 && x[2] == 1), 314,  #314 Heathland over degraded blanket bog
        ifelse((x[1] == 1 && x[3] == 1), 3,     #3 Upland acid grassland
          ifelse((x[1] == 4 && x[3] == 1), 5,     #5 Upland Calcareous grasslands
           ifelse((x[1] == 6 && x[3] == 1), 7,     #7 Upland hay meadows
            ifelse((x[1] == 14 && x[3] != 1), 13, x[1]))))))    #13 Lowland heathland
}


cl3 <- makePSOCKcluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl3)
reclass <- overlay(stack(preds_rf, peatExtent, uplandsExtent), fun=reclassfun)
stopCluster(cl3)

writeRaster(reclass, paste0("./Habitat_Classification_Maps/", year, "HabitatClass", sep = ""),
                          format = "GTiff", overwrite = TRUE)


remove(cl3, peatExtent, uplandsExtent, reclassfun, preds_rf, reclass, classesUsed, classNames, pixelCount, classNamesFilename,
       popnConMatrix)


#----------- Spatial Accuracy Assessment via Global Logistic Regression ------------------------------------------

testPixPredRef <- cbind(testPix,predictedPixels)

Correct <- ifelse(testPixPredRef$predictedPixels==testPixPredRef$Class,1,0)
binaryCorrect <- cbind(testPixPredRef, Correct)


#Standardise (z-scores) so can compare strength of variables in logistc regression
TestVectorStd <- binaryCorrect

for (i in 1:(length(TestVectorStd)-3)){
  TestVectorStd[,i] <- scale(TestVectorStd[,1], center = TRUE, scale = TRUE)
}

#logistical regression model using standardised vector data to select variables

TestVectorStd <- TestVectorStd[-c(29:30)] #remove class and predicted columns
LRmodel.std.full <- glm(TestVectorStd$Correct ~ ., data = TestVectorStd,
                        family = binomial)

step.model.Both <- stepAIC(LRmodel.std.full, direction = "both",
                           trace = FALSE)

####logistical regression on selected variables then map results of logisitcal regression####
InputVar <- names(step.model.Both$coefficients)
InputVar[1] <- "Correct"

TestVector.select <- TestVectorStd[InputVar]
LRmodel <- glm(TestVector.select$Correct ~ ., data = TestVector.select,
               family = "binomial")

saveRDS(LRmodel, paste0("./Models/",year,"LR_model.rds", sep = ""))

cl4 <- makePSOCKcluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl4)
preds_LR <- predict(img, LRmodel, type = "response", na.rm = TRUE)
stopCluster(cl4)

writeRaster(preds_LR, paste0("./Habitat_Classification_Maps/", year, "HabitatClassAccuracy", sep = ""),
            format = "GTiff", overwrite = TRUE)

remove(cl4, binaryCorrect, LRmodel,img, preds_LR, testPix, testPixPredRef, cores, Correct,
       predictedPixels, year, LRmodel.std.full, step.model.Both, TestVector.select, TestVectorStd,
       i, InputVar)

print("Finished")

