library(maptools);
library(raster);
library(lattice)
library(latticeExtra)
library(here)
library(RColorBrewer)
library(dplyr)
library(rgdal)
library(sf)
library(ggplot2)


crs_w <- "+proj=longlat +datum=WGS84 +no_defs"
Peru_country <- getData('GADM', country='PER', level=1);	#load country
crs(Peru_country)

Peru_adm <- getData('GADM', country='PER', level=2);		#load admin areas
Peru_dist <- getData('GADM', country='PER', level=3);	#load districts
Peru_alt<-getData('alt',country="PER", level=2);			#grab all altitude data
Peru_alt4000 <- Peru_alt;
    
Peru_alt4000[Peru_alt4000 >=4000] <- NA;
plot(Peru_alt4000)
    
unique(Peru_adm$NAME_1)	
unique(Peru_dist$NAME_1);
APUR <- Peru_dist[Peru_dist$NAME_1 =='Apurímac',];	
View(APUR@data)
CUSCO <- Peru_dist[Peru_dist$NAME_1 =='Cusco',];	
AYA <- Peru_dist[Peru_dist$NAME_1 =='Ayacucho',];
DATA <- rbind(APUR,CUSCO,AYA);
plot(DATA)

data1 <- aggregate(DATA, dissolve=TRUE)
plot(data1)

el_aac <- crop(Peru_alt4000, data1) ##remove districts not sampled 
el_aac2 <- mask(el_aac, data1)
plot(el_aac2)
    
##remove the areas not sampled and with no rabies outbreaks
##Districts from Ayacucho and Cusco
AYA_prov <- unique(AYA$NAME_2);
#need to remove Paucar del Sara Sara and Parinacochas
AYA_1 <- AYA_prov[AYA_prov != "Parinacochas" & AYA_prov != "Paucar del Sara Sara"];
AYA_2 <- AYA[which(AYA$NAME_2 %in% AYA_1),];
CUS_prov <- unique(CUSCO$NAME_2);
#need to remove Espinar
CUS_1 <- CUS_prov[CUS_prov != "Espinar"] ;
CUS_2 <- CUSCO[which(CUSCO$NAME_2 %in% CUS_1),];
DATA1 <- rbind(APUR,CUS_2,AYA_2);
plot(DATA1)
text(DATA1, DATA1@data$NAME_3, cex=0.4)
colors1 <- c("#00A600FF", "#3EBB00FF", "#8BD000FF", "#E6E600FF", "#E8C32EFF");
lucanas_loc <- AYA_2$NAME_2=="Lucanas";
locs_luc <- which(lucanas_loc %in% TRUE);
LUCANAS <- AYA_2[locs_luc,];
keeper_list <- c("Aucara", "Cabana", "Carmen Salcedo", "Chipao", "Santa Ana de Huaycahuacho" );
AYA_4 <- LUCANAS[which(LUCANAS$NAME_3 %in% keeper_list),];
LUCA_2 <- AYA[which(AYA$NAME_2 %in% AYA_1),];
all_but_lucanas <- AYA_2$NAME_2!="Lucanas";
locs_not_luc <- which(all_but_lucanas %in% TRUE);
AYA_3 <- AYA_2[locs_not_luc,];
AYA_5 <- rbind(AYA_3,AYA_4);
DATA2 <- rbind(APUR,CUS_2,AYA_5)
plot(DATA2, col="yellow")
text(DATA2, DATA2@data$NAME_2, cex=0.4)
colors1 <- c("#00A600FF", "#3EBB00FF", "#8BD000FF", "#E6E600FF", "#E8C32EFF")

#Lets remove the empty CUSCO districts
CUS_names <- CUSCO$NAME_3
remove <- c("Velille", "Marangani")
CUS_newnames <- CUS_names[which(!CUS_names %in% remove)];
#this has the 2 removed districts
CUS_2 <- CUSCO[which(CUSCO$NAME_3 %in% CUS_newnames),];
plot(CUS_2)
text(CUS_2, CUS_2@data$NAME_3, cex=0.4)
    
#remove CANAS from this now
CUS_7 <- CUS_2[which(!CUS_2$NAME_2 == "Canas"),]
Canas_loc <- CUSCO$NAME_2=="Canas";
locs_can <- which(Canas_loc %in% TRUE);
CANAS <- CUSCO[locs_can,];
keeper_list1 <- c("Tupac Amaru", "Pampamarca");
CUS_3 <- CANAS[which(CANAS$NAME_3 %in% keeper_list1),];
all_but_canas <- CUS_2$NAME_2!="Canas";
locs_not_can <- which(all_but_canas %in% TRUE);
CUS_4 <- CUS_2[locs_not_can,];
CUS_5 <- rbind(CUS_3,CUS_7)
CUS_9 <- CUS_5[which(CUS_5$NAME_2 != "Espinar"),]
DATA3 <- rbind(APUR,CUS_9,AYA_5)
plot(DATA3, col="yellow")
text(DATA3, DATA3@data$NAME_3, cex=0.4)
    
    
political_cusco <- unique(CUSCO$NAME_2);
newdat <- DATA3[which(DATA3$NAME_2 != "Quispicanchi"),]
plot(newdat, col="yellow")
newdat1 <- newdat[which(newdat$NAME_2 != "Canchis"),]
plot(newdat1, col="yellow")

lala_rem <- c("Echarate", "Huayopata", "Maranura" ,"Ocobamba", "Quellouno", "Santa Ana"); ##this is going to remove Ocobamba in Cusco and Apurimac, so I will add Ocobamba in Apurimac again 
#lala_rem <- c("Echarate", "Huayopata", "Maranura", "Quellouno", "Santa Ana");
newdat2 <- newdat1[!newdat1$NAME_3 %in% lala_rem, ]
plot(newdat2, col="yellow")
text(newdat2, newdat2@data$NAME_3, cex=0.4)

##add Ocobamba in Apu as it was removed too (in addition to Ocobamba in Cusco)
oco <- APUR[which(APUR$NAME_3 == "Ocobamba"),]
newdat21 <- rbind(newdat2, oco)
plot(newdat21, col="yellow")
text(newdat21, newdat21@data$NAME_3, cex=0.4)


newdat3 <- newdat21[which(newdat21$NAME_2 != "Paucartambo"),]
newdat4 <- newdat3[which(newdat3$NAME_2 != "Calca"),]
plot(newdat4, col="yellow")
newdat5 <- newdat4[which(newdat4$NAME_2 != "Urubamba"),]
plot(newdat5, col="yellow")
Ant_all <- newdat5[which(newdat5$NAME_2=="Anta"),]
remove_ant <- c("Huarocondo","Ancahuasi","Zurite","Pucyura", "Cachimayo","Anta")
Aco_all <- newdat5[which(newdat5$NAME_2=="Acomayo"),]
Can_all <- newdat5[which(newdat5$NAME_2=="Canas"),]
remove_aco_can <- c("Sangarara", "Acopia", "Mosoc Llacta","Pampamarca")
remove_3 <- c(remove_ant,remove_aco_can)
newdat6 <- newdat5[!newdat5$NAME_3 %in% remove_3, ]
plot(newdat6, col="yellow")
text(newdat6, newdat6@data$NAME_3, cex=0.4)
Cus_all <- newdat6[which(newdat6$NAME_2=="Cusco"),]

Cus_rem <- c("Cusco","Poroy","San Jeronimo","San Sebastian","Santiago","Saylla","Wanchaq") ## the same will happen in here - remove san Jeronimo from Apurimac 
newdat7 <- newdat6[!newdat6$NAME_3 %in% Cus_rem, ]
plot(newdat7, col="yellow")
text(newdat7, newdat7@data$NAME_3, cex=0.4)

##add San Jeronimo again to Apur
sj <- APUR[which(APUR$NAME_3 == "San Jeronimo"),]
newdat71 <- rbind(newdat7, sj)
plot(newdat71, col="yellow")
text(newdat71, newdat71@data$NAME_3, cex=0.4)

    
Cus_rem1 <- c("Ayahuanco", "Santillana")
newdat8 <- newdat71[!newdat71$NAME_3 %in% Cus_rem1, ]
plot(newdat8, col="yellow")
Hua_all <- newdat8[which(newdat8$NAME_2=="Huamanga"),]
rm_hua <- c("Acocro","Acos Vinchos","Ayacucho","Carmen Alto","Pacaycasa","Quinua","San Jose de Ticllas", "San Juan Bautista", "Santiago de Pischa","Socos","Tambillo", "Vinchos" )
newdat11 <- newdat8[!newdat8$NAME_3 %in% rm_hua, ]
    
plot(newdat11, col='yellow')
text(newdat11, newdat11@data$NAME_3, cex=0.4)
newdat11@data
    
    
ele <- crop(Peru_alt4000,extent(newdat11));		#this gets the min/max lats/longs - still need to mask though
aac_ele <- mask(ele,newdat11);

plot(newdat11, col="yellow")
plot(aac_ele, add=TRUE)

ele[ele<4000]<-1
ele[ele>=4000]<-NA
ppol1<-rasterToPolygons(ele,dissolve = T)
plot(ppol1)
    
# match projections
crs(ppol1)<-crs(newdat11)
    
## merge livestock rabies data with map data - now with new data 
d <- rab20032021_29July2021_outbreaklevel
##check for duplicates, extract only AAC area/ then the rabies positives
dup_trabies <-
  duplicated(pull(d, cod_muestra)) |
  duplicated(pull(d, cod_muestra), fromLast=TRUE)
dup_trabies2 <-
  d[dup_trabies, c('anno', 'enfermedad', 'desc_inte_are', 'nomb_dpto_dpt', 'nomb_prov_tpr', 'nomb_dist_tdi','cod_muestra', 'localidad.x', 'lat.loc.dist', 'long.loc.dist','fecha_inicio2', 'Obs')]

View(dup_trabies2) ##no duplicates at the outbreak level 

aac<-subset(d,d$nomb_dpto_dpt=="APURIMAC"|d$nomb_dpto_dpt=="AYACUCHO"|d$nomb_dpto_dpt=="CUSCO")
dim(aac) ##2101
aac<-droplevels(aac) 
aac$desc_inte_are <- as.factor(aac$desc_inte_are)
levels(aac$desc_inte_are)
aac$enfermedad <- as.factor(aac$enfermedad)
levels(aac$enfermedad) ## we only have rabies 

##select rabies positive cases
aac.p<-subset(aac,aac$desc_inte_are=="POSITIVO") ##1231
dim(aac.p)
names(aac.p)

aac.tab<-table(aac.p$nomb_dist_tdi)
aac.dur.tab<-table(aac.p$nomb_dist_tdi,aac.p$anno)

##to transform for upper and lower cases 
newdat11$NAME_3<-toupper(newdat11$NAME_3)
tempdf.o1<-data.frame(aac.tab)

## record the first year that rabies was detected in each district
fy<-c()
for (i in 1:nrow(aac.dur.tab)){
  fy[i]<-min(which(aac.dur.tab[i,]>0))
}
fy<-ifelse(fy=="Inf",0,fy)
fy.df<-data.frame(cbind(rownames(aac.dur.tab),fy,fy+2002))
m.fy<-merge(newdat11,fy.df,by.x="NAME_3",by.y="V1",incomparables=NULL)

plot(m.fy)
clip3<-raster::intersect(ppol1, m.fy)
plot(clip3, col="yellow")

valleys <- aggregate(clip3, dissolve=TRUE)
plot(valleys)


crs_km <- sp::CRS("+proj=utm +zone=18 +south +ellps=GRS80 +units=km +no_defs") 
valleys1 <- spTransform(valleys, crs_km, passthrough = FALSE)


##Kernel smoothing - applies Gaussian kernel regression to the x and y coordinates. Prior to smoothing, additional vertices are added via densification with smooth_densify(). For polygons (and closed lines), method = "periodic" is used to avoid getting a kink at the start/end of the curve defining the boundary.

##Kernel smoothing simultaneously smooths and generalizes curves, and can be tuned to produce drastically smoothed curves.  
##smooth_ksmooth() has parameters specifying the number of points in the resulting feature (either the number of sub-segments to split each line segment into or the maximum distance between points) and the bandwidth of the Gaussian kernel. Choosing a suitable bandwidth is critical for correctly smoothing features using this algorithm and typically users will want to explore a range of bandwidth to find a value that works for their particular scenario.
library(smoothr)
valleys2 <- smooth(valleys1, method = "ksmooth", smoothness=3)
##smoothness: a positive number controlling the smoothness and level of generalization. At the default value of 1, the bandwidth is chosen as the mean distance between adjacent vertices. Values greater than 1 increase the bandwidth, yielding more highly smoothed and generalized features, and values less than 1 decrease the bandwidth, yielding less smoothed and generalized features


plot(valleys1, col = "grey40", border = NA)
plot(valleys2, col = NA, border = "#E41A1C", lwd = 1, add = TRUE) 


#drop_crumbs() removes small lines or polygons based on a length or area threshold. 
#For multipart features, this function dives into the individual component features and applies the threshold. 
#The threshold can either be provided as a number, assumed to be in the units of the coordinates reference system (meters for unprojected coordinates), or as a units object. This latter approach provides more flexibility because a threshold can be given in any type of units and it will be converted to the correct units automatically.

area_thresh <- units::set_units(400, km^2)
p_dropped <- drop_crumbs(valleys2, threshold = area_thresh)

par(mfrow=c(1, 2)) 
plot(valleys1, col = "black", main = "Original")
plot(p_dropped, col = "black", main = "After drop_crumbs()") 


##save as a shapefile to work as a mesh 
shapefile(p_dropped, filename = 'valleys_aac_4000m_smooth_f')


##Important for figures if we want to tidy up the small points inside the shapefile 
##################code delete holes in spatial polygon dataframe 
m <- st_read("valleys_aac_4000m_smooth_f.shp")
class(m)
plot(m)
crs(m)

library(nngeo)
m2 <- nngeo::st_remove_holes(m, max_area = 1)
plot(m2)


#(st_union(m$geom)) -> m1
as(st_geometry(m2), "Spatial") -> nc_sp
as(nc_sp, "SpatialPolygonsDataFrame") -> X1
class(X1)
plot(X1) 

##save as rds 

saveRDS(X1, "boundaries.rds")
bound <- readRDS("boundaries.rds")