# This script allow to reproduce the graphics and data for the article
# install the lczexplore package
library(lczexplore)
library(dplyr)
library(ggplot2)

# IMPORTANT
# replace path with the path to the folder in which you copied the source data
# with the proper bdtopo_2_2 and osm subfolders
# If the data seem to be missing, the function will try to fetch it from the Geomanum Foundation cloud
# but with no guarantee of availability

folderPath<-"/home/decide/Documents/CloudS/ZaclysOnlyOffice/Boulot/Articles/LCZ_bdt_osm/Data_and_code_zenodo/Data/input/"
setwd(folderPath)

##############################################################################
# Set locations for which data is stored and can be fetched and analyzed
##############################################################################

locations<-c("Allaire","Annecy","Avignon","Blagnac","Bourgneuf","Charnay-lès-Mâcon",
             "Corbonod","Dijon","Gratentour","La-Haie-Fouassiere","La-Rochelle",
             "Lathuile","Meudon","Nanterre","Nantes","Paris","Pont-de-Veyle","Rennes",
             "Saint-Ganton","Saint-Nicolas-de-Redon","Staffelfelden","Toulouse")
##############################################################################
# Produce the comparison data and plots
##############################################################################
for (i in locations){
  produceAnalysis(location=i,
                  wf1="bdtopo_2_2",
                  wf2="osm",refYear1="2022",refYear2="2022",repr="brut",saveG=location)
}
##############################################################################
# retrieve the comparison data and produce the general confusion matrix
##############################################################################

toutesLocations<-read.csv(paste0(folderPath,"bdtopo_2_2_osm.csv"),sep=";",stringsAsFactors = T)%>% unique

matConfLCZGlob(filePath="",inputDf=toutesLocations,
               wf1="BDTopo V2",wf2="OSM (year = 2022)",
               geomID1="ID_RSU",column1="LCZ_PRIMARY",confid1="LCZ_UNIQUENESS_VALUE",
               geomID2="ID_RSU.1",column2="LCZ_PRIMARY.1",confid2="LCZ_UNIQUENESS_VALUE.1",
               sep=";",repr="brut",
               niveaux=c("Compact high","Compact mid","Compact low","Open High","Open mid","Open low",
                         "Lightweight low","Large low","Sparsely Built","Heavy industry","Dense trees",
                         "Scattered trees","Bush scrub","Low plants","Bare rock paved","Bare soil sand","Water"))

##############################################################################
## Compare the distribution of RSU areas according to the input data source
###############################################################################
# BD TOPO data
airesBDT<-toutesLocations %>%  group_by(location,ID_RSU) %>%
  mutate(aireGeomBDT=sum(aire),LCZ_PRIMARY=LCZ_PRIMARY,.keep="none") %>%
  unique

# OSM Data
airesOSM<-toutesLocations %>%  group_by(location,ID_RSU.1) %>%
  mutate(aireGeomOSM=sum(aire),LCZ_PRIMARY=LCZ_PRIMARY.1,.keep="none") %>%
  unique

pourHist<-data.frame(aire=c(airesBDT$aireGeomBDT,airesOSM$aireGeomOSM,toutesLocations$aire),
                     source=c(rep("BDT",length(airesBDT$aireGeomBDT)),
                              rep("OSM",length(airesOSM$aireGeomOSM)),
                              rep("Intersect",length(toutesLocations$aire)))
)

ggplot(pourHist,aes(x=aire,color=source))+
  stat_ecdf(geom = "step")+
  coord_cartesian(xlim = c(0, 15000))

##############################################################################
#### Sensitivity analysis on confidence (LCZ uniqueness value)
##############################################################################

confidSensib(filePath=paste0(folderPath,"bdtopo_2_2_osm.csv"), nPoints=10,
             wf1="bdtopo_2_2", wf2="osm",
             geomID1="ID_RSU", column1="LCZ_PRIMARY", confid1="LCZ_UNIQUENESS_VALUE",
             geomID2="ID_RSU.1",column2="LCZ_PRIMARY.1", confid2="LCZ_UNIQUENESS_VALUE.1",
             sep=";", repr="brut",
             plot=TRUE,saveG=folderPath)


