
setwd("C:/Users/Mike Williamson/Dropbox/ReefShark_RESET/R")
setwd("/Users/michaelwilliamson/Dropbox/ReefShark_RESET/R")

######## PACKAGES REQUIRED ######### 
library(asnipe)
library(igraph)
library(AER)
library(performance)
library(TMB)
library(glmmTMB)
library(MuMIn)
library(lme4)
library(nlme)
library(mgcv)
library(lmerTest)
library(gamm4)
library(car)
library(DHARMa)
library(gamlss)
library(raster)
library(asnipe)
library(igraph)
library(chron)
library(sp)
library(lubridate)
library(tidyverse)
library(plyr)# warning can block some dplyr functions if laoded after dplyr
library(tidytext)
library(lubridate)
library(data.table)
library(reshape2)
library(dplyr)
library(viridis)
library(emmeans)
library(ggplot2)

############ PREPARE DATA SET ############ 

# add in ALL data
ALL <- read.delim("Chagos_ALL_acoustic_2021.txt", header = TRUE, sep = ",")

#add in meta data - need to get info
META <- read.csv("Chagos_ALL_tag_metadata_May2019.csv", header=TRUE, stringsAsFactors = FALSE)

#add receiver dat

RECS <- read.csv("Receivers_NorthernAtolls.csv", header = TRUE)


#add new columns
ALL$species <- META$commonname[match(ALL$code, META$idcode)]
ALL$sex <- META$sex[match(ALL$code, META$idcode)]
ALL$TL <- META$len1[match(ALL$code, META$idcode)]
ALL$taggingdate <- META$taggingdate[match(ALL$code, META$idcode)]


#make TL numeric

ALL$TL <- as.numeric(ALL$TL)

# rename one of the SA02 receiver locations as SA05, and rename other 2 as new stations 

ALL$station <- ifelse(ALL$station == "SA02" & ALL$receiver_lat == -5.35018, "SA05", ALL$station)
ALL$station <- ifelse(ALL$station == "SA02" & ALL$receiver_lat == -5.316745, "SA02b", ALL$station)
ALL$station <- ifelse(ALL$station == "NI01" & ALL$receiver_lat == -5.6869, "NI01b", ALL$station)
ALL$station <- ifelse(ALL$station == "SA3", "SA4G01", ALL$station)

#need to chnage PB1 and Pb4 to PB01 and PB04

ALL$station <- gsub('\\bPB1\\b', 'PB01', ALL$station)
ALL$station <- gsub('\\bPB4\\b', 'PB04', ALL$station)


###### CREATE GRS DATA AND FILTER DATA #####
GRS_ALL <- ALL %>% 
  dplyr::filter(species == "Grey Reef Shark") %>% 
  dplyr::filter(receiver_lat >= -5.85)

lapply(GRS_ALL,class)
class(GRS_ALL)

#add atoll data

GRS_ALL$atoll <- RECS$region[match(GRS_ALL$station, RECS$id)]

#remove data from 24 hours from tagging date
GRS_ALL <- GRS_ALL %>% dplyr::filter(as.Date(ymd_hms(detect_date)) != dmy(taggingdate)) 
GRS_ALL <- dplyr::select(GRS_ALL, -c("taggingdate"))


#change detect_date to character as in 19 dataset factor. Need for later on

GRS_ALL$detect_date <- as.character(GRS_ALL$detect_date)
class(GRS_ALL$detect_date)


# Add datetime column in posixct format - add in tiome zone as otherwise get NAs for dayylight savings

#GRS_ALL$datetime <- as.POSIXct(GRS_ALL$detect_date , format = "%Y-%m-%d %H:%M:%S" , tz = "") #Indian/Chagos
#class(GRS_ALL$datetime)

GRS_ALL$datetime <- lubridate::ymd_hms(GRS_ALL$detect_date)
class(GRS_ALL$datetime)

na_rows <- which(is.na(GRS_ALL$datetime))


# sort data by individual then time - so network analysis is in done in right order

GRS_ALL$code <- as.factor(GRS_ALL$code)

GRS_ALL <- GRS_ALL %>% 
  arrange(code, datetime)


#GRS_ALL <- arrange(GRS_ALL, code, datetime)

#format date time to POSIXct and extract time and add to new column

# make columns from posixct object
GRS_ALL$time <- strftime(GRS_ALL$datetime, format="%H:%M:%S")
GRS_ALL$year <- strftime(GRS_ALL$datetime, format="%Y")
GRS_ALL$month <- as.numeric(strftime(GRS_ALL$datetime, format="%m"))
GRS_ALL$month1 <- strftime(GRS_ALL$datetime, format="%b")
GRS_ALL$week <- strftime(GRS_ALL$datetime, format="%W")
GRS_ALL$day <- strftime(GRS_ALL$datetime, format="%j")
GRS_ALL$hour <- strftime(GRS_ALL$datetime, format="%H")
GRS_ALL$date <- strftime(GRS_ALL$datetime, format="%Y/%m/%d")
GRS_ALL$monthyear <- strftime(GRS_ALL$datetime, format="%Y/%m")

unique(GRS_ALL$year)

# make season column
GRS_ALL <- GRS_ALL %>%
  mutate(season = ifelse(month %in% 04:09, 'dry.season', 'wet.season'))

#make daynight column
GRS_ALL$daynight = with(GRS_ALL, ifelse(time > "060000" & time < "180000", "day", "night"))

# make El Nino column

GRS_ALL <- within(GRS_ALL, {
  ElNino <- NA
  ElNino [date >= "2013/02/10" & date <= "2014/06/30"] <- "ElNino_Before"
  ElNino [date >= "2014/07/01" & date <= "2016/06/30"]  <- "ElNino_During"
  ElNino [date >= "2016/07/01" & date <= "2019/03/25"] <- "ElNino_After"
})

summary(GRS_ALL$datetime)
min(GRS_ALL$datetime)

#make seasonyear column

GRS_ALL$seasonyear <- gsub(" ", "", paste(GRS_ALL$season,".",GRS_ALL$year)) #removes spaces

str(GRS_ALL)

#sort datetime into different format to match previous code
#NOTE THAT IF USE THIS DATETIME FORMATT WON'T CACUATE YEAR TIME ETC ABOVE PROPERLY, SO CHNAGE AFTER CALCULATING THOSE
GRS_ALL$datetime <- strftime(GRS_ALL$datetime, format="%d/%m/%Y %H:%M:%S")

# create SL and Transition columns for filtering

# first record

# foo <- which(RS$animal_id[1:(nrow(RS)-1)] != RS$animal_id[2:(nrow(RS))]) # identify where code changes
foo <- which(GRS_ALL$code[1:(nrow(GRS_ALL)-1)] != GRS_ALL$code[2:(nrow(GRS_ALL))]) # identify where code changes

GRS_ALL$firstrec <- rep(0,nrow(GRS_ALL))

GRS_ALL$firstrec[foo+1] <- 1	# think why 'foo + 1'

GRS_ALL$firstrec[1] <- 1

rm(foo)

# self loop when station is not same as previous gives 1 if not 0

foo <- which(GRS_ALL$station[1:(nrow(GRS_ALL)-1)] != GRS_ALL$station[2:(nrow(GRS_ALL))])

GRS_ALL$selfloop <- rep(0,nrow(GRS_ALL))

GRS_ALL$selfloop[foo+1] <- 1	# think why 'foo + 1'

GRS_ALL$selfloop[1] <- 1

rm(foo)

#create time_diff column
GRS_ALL$T <- as.numeric(strptime(GRS_ALL$datetime, format = "%d/%m/%Y %H:%M")) 

which(colnames(GRS_ALL)=="T") #27
GRS_ALL$time_diff <- c(0, diff(GRS_ALL[,27])/60)
#GRS_ALL$time_diff_2 <- c(0, diff(GRS_ALL$datetime, lag = 1))

#however still calculates from previous record so need to add NA where there is a new animal

GRS_ALL$selfloop <- ifelse(GRS_ALL$firstrec == 1, NA, GRS_ALL$selfloop) # first one for each individual is NA
GRS_ALL$time_diff <- ifelse(GRS_ALL$firstrec == 1, NA, GRS_ALL$time_diff) # first one for each individual is NA
#GRS_ALL$time_diff_2 <- ifelse(GRS_ALL$firstrec == 1, NA, GRS_ALL$time_diff_2) # first one for each individual is NA

# add in diGRSance between
library(geosphere)

GRS_ALL$lat_from <- GRS_ALL$receiver_lat
GRS_ALL$lon_from <- GRS_ALL$receiver_lon

GRS_ALL <- GRS_ALL %>% mutate_at(c("lat_from", "lon_from"), funs(lag), n = 1 ) %>% 
  mutate(diff_dist = distHaversine(cbind(lon_from, lat_from), cbind(receiver_lon, receiver_lat))) %>% 
  dplyr::select(-c(lat_from, lon_from)) %>% 
  mutate(speed_ms = diff_dist/(time_diff*60))

GRS_ALL$diff_dist<- ifelse(GRS_ALL$firstrec == 1, NA, GRS_ALL$diff_dist) 
GRS_ALL$speed_ms <- ifelse(GRS_ALL$firstrec == 1, NA, GRS_ALL$speed_ms)

GRS_ALL$diff_dist <- round(GRS_ALL$diff_dist, digits = 2) 
GRS_ALL$speed_ms <- round(GRS_ALL$speed_ms, digits = 2)

#calculate sample sizes per year

GRS_ALL %>% 
  dplyr::group_by(year) %>% 
  dplyr::summarise(codeCount = n_distinct(code)) %>% 
  ungroup()


GRS_ALL %>% 
  dplyr::group_by(year, month) %>% 
  dplyr::summarise(codeCount = n_distinct(code)) %>% 
  dplyr::ungroup()

#format date as date

GRS_ALL$date <- lubridate::ymd(GRS_ALL$date)
class(GRS_ALL$date)

unique_dates <- unique(GRS_ALL$date)
sorted_unique_dates <- sort(unique_dates)
print(sorted_unique_dates)
sorted_unique_dates_desc <- rev(sort(unique_dates))
print(sorted_unique_dates_desc)

# filter data

GRS_filt <- GRS_ALL %>% 
  dplyr::group_by (code) %>% 
  dplyr::filter(n()>1) %>% # removes animals with only one detection in the system
  dplyr::filter(speed_ms < 6.9) %>% #6.9 for GRS and 7.3 for ST
  dplyr::filter(time_diff >= 0) %>% 
  dplyr::filter(date >= ymd("2013-03-01")) %>% # first month with detections for full month is march 2013
  dplyr::filter(date < ymd("2020-12-01")) %>% #last month with full detections is Nov 2013
  dplyr::ungroup()

unique(GRS_filt$year)

GRS_filt <- as.data.frame(GRS_filt) # need to convert dataset back to dataframe. Code won't work on tibble.


# now filter only for animals 'resident to 5 atolls
# find which atoll animal is resident too, by looking at detections per atoll
GRS_res_atoll <- GRS_filt %>% 
  dplyr::group_by(code, atoll) %>% 
  dplyr::summarise(n = n()) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(code) %>% 
  dplyr::top_n(1, n) %>% 
  dplyr::ungroup()

# add resident atoll per shark

GRS_filt$res_atoll <- GRS_res_atoll$atoll[match(GRS_filt$code, GRS_res_atoll$code)]

unique(GRS_filt$res_atoll)

#remove animals not resident to 5 key atolls

atolls <- c("Peros Banhos", "Benares", "Saloman", "Blenheim", "Victory Bank")

GRS_filt <- GRS_filt[GRS_filt$res_atoll %in% atolls, ]

#remove detections not from key atolls

GRS_filt <- GRS_filt[GRS_filt$atoll %in% atolls, ]

########## PREPARING RESET DATA ##########

# add in RESET data
#n NOTE have removed data from 3 receivers that had no detections AR01 GCB04 and NI01

RESET_rec <- read.csv("RESET_reproject_2013-2020_update.csv", header=TRUE, stringsAsFactors = FALSE)

# remove "T" from date column to give "%Y-%m-%d %H:%M:%S"

RESET_rec$date <- gsub("T", " ", RESET_rec$date)

#covert date to posixct

RESET_rec$date <- as.POSIXct(RESET_rec$date, format = "%Y-%m-%d %H:%M:%S" , tz = "")
class(RESET_rec$date)

#add monthyear column
RESET_rec$monthyear <- strftime(RESET_rec$date, format="%Y/%m")

unique(RESET_rec$stress_category)

### CREATE RESET AT EACH RECEIVER ####

RESET_rec <- RESET_rec %>% 
  mutate_at(vars(SE_score), ~ if_else(stress_category == "Reducer", .x*-1, .x))

# if NA in variable then change SE_score to NA

RESET_rec$SE_score <- ifelse(is.na(RESET_rec$variable_mean), NA, RESET_rec$SE_score)


#count number of variables with data per year, month and reef

sapply(RESET_rec, class)

num_vars <- RESET_rec %>% 
  na.omit() %>% 
  dplyr::mutate_if(is.character, as.factor) %>%
  dplyr::group_by(id, monthyear) %>%
  dplyr::summarise(var_num = n())

# add in demoninator number

RESET_rec <- merge(RESET_rec, num_vars, by.x = c('id','monthyear'), 
                   by.y = c('id', 'monthyear'), all.x  = TRUE)

#write.csv(BIOT.Dat,"BIOT.Dat.csv", row.names = TRUE)

# add new column with SE_score divided by var
RESET_rec$SE_mean <- RESET_rec$SE_score/RESET_rec$var_num


#get score for each receiver
REC.RESET <- RESET_rec %>% 
  na.omit() %>% 
  dplyr::mutate_if(is.character, as.factor) %>%
  dplyr:: group_by(id, monthyear) %>%
  dplyr::summarise(SE_score= sum(SE_mean))

REC.RESET$SE_score <- ifelse(REC.RESET$SE_score < 0, 0, REC.RESET$SE_score)

#round score
#REC.RESET$SE_score <- round(REC.RESET$SE_score, digits=2)


# add atoll

REC.RESET$atoll <- RECS$region[match(REC.RESET$id, RECS$id)]

#remove data not in key atolls

atolls <- c("Peros Banhos", "Benares", "Saloman", "Blenheim", "Victory Bank")

REC.RESET <- REC.RESET[REC.RESET$atoll %in% atolls, ]

#change id to station

REC.RESET <- dplyr::rename(REC.RESET, station = id)

#summary stats

summary(REC.RESET$SE_score)

tapply(REC.RESET$SE_score, REC.RESET$station, summary)

sd(REC.RESET$SE_score)


##### ADD IN RESET DATA TO EACH SPECIES DATA #####

GRS_fin <- merge(GRS_filt, REC.RESET, by = c('station', 'monthyear', 'atoll'))

#rearrange by code and date

str(GRS_fin)
str(GRS_filt)

#covert 

GRS_fin$datetime <- as.POSIXct(GRS_fin1$datetime, format = "%d/%m/%Y %H:%M:%S")

GRS_fin  <- GRS_fin %>% 
  dplyr::arrange(code, datetime)

########## MAKE MOVEMENT DATASET FOR NETWORK ##########

library("asnipe")
library("igraph")
library("chron")
library("sp")
library("lubridate")

GRS_NET <- GRS_fin

#create network data

AID = GRS_NET$code
LOC = GRS_NET$station
ping = GRS_NET$datetime # look for detection name may vary

GRS_NET$diff_ID <- c(0, diff(AID))
GRS_NET$diff_loc <- c(0, diff(as.factor(LOC))) #need to make LOC a factor as well!
# could make T and time diff here but done it already

# subset network for GRS_NET

#find column numbers
which(colnames(GRS_NET)=="diff_ID") #33
which(colnames(GRS_NET)=="diff_loc") #34

# make From and To columns when animal detection changes form one receiver to another

move <- (GRS_NET[,33]==0 & GRS_NET[,34]!=0) #diff_id and diff_loc
b <- rep(NA, length(move)) #creates an empty logit of the same proportions [1:N] 
a <- rep(NA, length(move))
c <- rep(NA, length(move))
d <- rep(NA, length(move))
e <- rep(NA, length(move))

which(colnames(GRS_NET)=="station") #1
b[move] <- GRS_NET[move, 1] #if b[TRUE] paste GRS_NET from column 5
a[move] <- GRS_NET[(which(move)-1), 1] #which(converts T/F back into original characters)
c[move] <- c(1:length(which(move)))
d[move] <- GRS_NET[move, 1]
e[move] <- GRS_NET[(which(move)-1), 1]

GRS_NET$From <- a
GRS_NET$To <- b
GRS_NET$Movement <- c

######### Creating non-proportional, simple edge count matrices ########

which(colnames(GRS_NET)=="From") #35
which(colnames(GRS_NET)=="To") #36
el_dat <- GRS_NET[,c(35,36)] #creates an edge list using the 'from' and 'to' columns (you may need to change the column numbers_ALL here from '13,14')
el_dat <- na.omit(el_dat)# removes nas

which(colnames(GRS_NET)=="time_diff") #28

el_time_dat <- GRS_NET[,c(35,36,28)] #check these columns should be to and from and time difference, in that order
el_time_dat <- na.omit(el_time_dat)

#create non-directed network
#and turn into adj matrix

G <- graph_from_data_frame(el_dat,directed=FALSE) # creates a non-directed dat frame
netG <- as_adjacency_matrix(G) # and turns it into a adj matrix
netG <- as.matrix(netG, header=TRUE, row.names=1)

dat_NET <- graph_from_adjacency_matrix(netG, mode = "undirected", diag = FALSE, weighted = TRUE)


#make table of summary stats for suppplemnentary

shark_metadat <- as.data.frame(unique(GRS_fin$code))
shark_metadat <- dplyr::rename(shark_metadat, code = "unique(GRS_fin$code)")

#combine meta data
shark_metadat$species <- META$commonname[match(shark_metadat$code, META$idcode)]
shark_metadat$sex <- META$sex[match(shark_metadat$code, META$idcode)]
shark_metadat$TL <- as.numeric(META$len1[match(shark_metadat$code, META$idcode)])
shark_metadat$Tag_date <- META$taggingdate[match(shark_metadat$code, META$idcode)]

detections <- GRS_fin %>% 
  dplyr::group_by(code) %>% 
  dplyr::summarise(first_detection = min(date), final_detection = max(date), 
            days_detected = as.numeric(difftime(final_detection, first_detection)))

detections$days_detected <- detections$days_detected +1
detections$days_detected <- round(detections$days_detected, 0)

#merge datasets removing codes not found in detections

shark_metadat <- merge(shark_metadat, detections, by.x = c('code'), 
                       by.y = c('code'), all.x  = TRUE)

shark_metadat <- shark_metadat %>% 
dplyr::filter(sex != "U")

summary(shark_metadat$TL)
sd(shark_metadat$TL)

# count sex
shark_metadat %>%
  dplyr::count(sex)

write.csv(shark_metadat ,"shark_metadat.csv")



library(MuMIn)



########### GRS RESIDENCY DATA ##########

#use BASIC_V2 data

dat <- GRS_filt %>% 
  filter(monthyear == "2015/08")

#LAtest residency analysis - 24/07/23

nodeRes_func <- function(dat){
  #need to add 1 to days at lib as other wise get 0 if just found on 1 day as it's a difference
  
  res_index <- dat %>% 
    dplyr::group_by(station, code) %>% #station
    dplyr::summarise(ud =n_distinct(date))
  
  days_lib <- res_index %>% 
    dplyr::group_by(station,code) %>% 
    dplyr::summarise(days_lib = 1+ as.numeric(max(as.Date(dat$date)) - min(as.Date(dat$date))),
                     ri = ud/days_lib)
  
  res_index <- merge(res_index, days_lib, by = c('station', 'code'))
  
  #need to add 1 to days at lib as other wise get 0 if just found on 1 day as it's a difference
  
  
  res_index$ri <- round(res_index$ri, digits = 4)
  
  res_index <- as.data.frame(res_index)
  #res_index$code <- rep(first(dat$code), nrow(res_index))
  res_index$monthyear <- rep(first(dat$monthyear), nrow(res_index))
  #res_index$species <- META$commonname[match(res_index$code, META$idcode)]
  #res_index$species <- rep(first(dat$species), nrow(res_index))
  
  return(res_index)
}

month_list <- strftime(seq(as.Date("2013/03/01"), as.Date("2020/11/30"), by = "month"),format="%Y/%m")

GRSrI_datalist = list()
GRSrI_idx <- 1


for (i in month_list){
  GRSrI_datalist [[GRSrI_idx]] <- GRS_fin %>% 
    filter(monthyear== i) %>% 
    nodeRes_func
  GRSrI_idx <- GRSrI_idx + 1
}

GRS_rI <- do.call(rbind, GRSrI_datalist)


GRS_rI <- merge(GRS_rI, REC.RESET, by = c('station', 'monthyear'))

# add in sex and length data

GRS_rI$sex <- META$sex[match(GRS_rI$code, META$idcode)]
GRS_rI$TL <- META$len1[match(GRS_rI$code, META$idcode)]

str(GRS_rI)

n_distinct(GRS_rI$station)

#remove unknown animal

GRS_rI <- GRS_rI %>%
  dplyr::filter(sex != "U")

#count number of stations for summary stats
n_distinct(GRS_rI$code)

#get summary Ri stats

summary(GRS_rI$ri)
sd(GRS_rI$ri)

#### DJ additions ####

GRS_rI_test <- GRS_rI[GRS_rI$monthyear %in% c('2014/01', '2014/02', '2014/03',
                                              '2014/04', '2014/05', '2014/06'), ]
GRS_rI_test$period <- 1
RImeans <- GRS_rI_test %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = max(ri), n = n())
RImeans$period <- 1

ggplot(RImeans, aes(x=period, y=mean, size = n)) +
  geom_point(alpha=0.7)

#### DJ additions ####
#install.packages(c("hrbrthemes","dplyr"))
library(hrbrthemes)
library(dplyr)
GRS_rI_1 <- GRS_rI[GRS_rI$monthyear %in% c('2013/07', '2013/08', '2013/09',
                                           '2013/10', '2013/11', '2013/12'), ]
GRS_rI_2 <- GRS_rI[GRS_rI$monthyear %in% c('2014/01', '2014/02', '2014/03',
                                           '2014/04', '2014/05', '2014/06'), ]
GRS_rI_3 <- GRS_rI[GRS_rI$monthyear %in% c('2014/07', '2014/08', '2014/09',
                                            '2014/10', '2014/11', '2014/12'), ]
GRS_rI_4 <- GRS_rI[GRS_rI$monthyear %in% c('2015/01', '2015/02', '2015/03',
                                           '2015/04', '2015/05', '2015/06'), ]
GRS_rI_5 <- GRS_rI[GRS_rI$monthyear %in% c('2015/07', '2015/08', '2015/09',
                                            '2015/10', '2015/11', '2015/12'), ]
GRS_rI_6 <- GRS_rI[GRS_rI$monthyear %in% c('2016/01', '2016/02', '2016/03',
                                           '2016/04', '2016/05', '2016/06'), ]
GRS_rI_7 <- GRS_rI[GRS_rI$monthyear %in% c('2016/07', '2016/08', '2016/09',
                                            '2016/10', '2016/11', '2016/12'), ]
GRS_rI_8 <- GRS_rI[GRS_rI$monthyear %in% c('2017/01', '2017/02', '2017/03',
                                           '2017/04', '2017/05', '2017/06'), ]
GRS_rI_9 <- GRS_rI[GRS_rI$monthyear %in% c('2017/07', '2017/08', '2017/09',
                                            '2017/10', '2017/11', '2017/12'), ]
GRS_rI_10 <- GRS_rI[GRS_rI$monthyear %in% c('2018/01', '2018/02', '2018/03',
                                              '2018/04', '2018/05', '2018/06'), ]
GRS_rI_11 <- GRS_rI[GRS_rI$monthyear %in% c('2018/07', '2018/08', '2018/09',
                                           '2018/10', '2018/11', '2018/12'), ]
GRS_rI_12 <- GRS_rI[GRS_rI$monthyear %in% c('2019/01', '2019/02', '2019/03',
                                           '2019/04', '2019/05', '2019/06'), ]
GRS_rI_13 <- GRS_rI[GRS_rI$monthyear %in% c('2019/07', '2019/08', '2019/09',
                                            '2019/10', '2019/11', '2019/12'), ]
GRS_rI_14 <- GRS_rI[GRS_rI$monthyear %in% c('2020/01', '2020/02', '2020/03',
                                            '2020/04', '2020/05', '2020/06'), ]
#1

GRS_rI_1$period <- 1
RImeans1 <- GRS_rI_1 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans1 <- merge(RImeans1, Dum, by = "mean", all = TRUE)
RImeans1$period <- 1
RIM1 <- RImeans1 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props1 <- RIM1 %>% mutate(proportion = n / sum(n))
RIM_props1$period <- 1

#2

GRS_rI_2$period <- 2
RImeans2 <- GRS_rI_2 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans2 <- merge(RImeans2, Dum, by = "mean", all = TRUE)
RImeans2$period <- 2
RIM2 <- RImeans2 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props2 <- RIM2 %>% mutate(proportion = n / sum(n))
RIM_props2$period <- 2

#3
GRS_rI_3$period <- 3
RImeans3 <- GRS_rI_3 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans3 <- merge(RImeans3, Dum, by = "mean", all = TRUE)
RImeans3$period <- 3
RIM3 <- RImeans3 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props3 <- RIM3 %>% mutate(proportion = n / sum(n))
RIM_props3$period <- 3


#4
GRS_rI_4$period <- 4
RImeans4 <- GRS_rI_4 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans4 <- merge(RImeans4, Dum, by = "mean", all = TRUE)
RImeans4$period <- 4
RIM4 <- RImeans4 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props4 <- RIM4 %>% mutate(proportion = n / sum(n))
RIM_props4$period <- 4

#5
GRS_rI_5$period <- 5
RImeans5 <- GRS_rI_5 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans5 <- merge(RImeans5, Dum, by = "mean", all = TRUE)
RImeans5$period <- 5
RIM5 <- RImeans5 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props5 <- RIM5 %>% mutate(proportion = n / sum(n))
RIM_props5$period <- 5

#6
GRS_rI_6$period <- 6
RImeans6 <- GRS_rI_6 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans6 <- merge(RImeans6, Dum, by = "mean", all = TRUE)
RImeans6$period <- 6
RIM6 <- RImeans6 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props6 <- RIM6 %>% mutate(proportion = n / sum(n))
RIM_props6$period <- 6

#7
GRS_rI_7$period <- 7
RImeans7 <- GRS_rI_7 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans7 <- merge(RImeans7, Dum, by = "mean", all = TRUE)
RImeans7$period <- 7
RIM7 <- RImeans7 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props7 <- RIM7 %>% mutate(proportion = n / sum(n))
RIM_props7$period <- 7

#8
GRS_rI_8$period <- 8
RImeans8 <- GRS_rI_8 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans8 <- merge(RImeans8, Dum, by = "mean", all = TRUE)
RImeans8$period <- 8
RIM8 <- RImeans8 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props8 <- RIM8 %>% mutate(proportion = n / sum(n))
RIM_props8$period <- 8

#9
GRS_rI_9$period <- 9
RImeans9 <- GRS_rI_10 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans9 <- merge(RImeans9, Dum, by = "mean", all = TRUE)
RImeans9$period <- 9
RIM9 <- RImeans9 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props9 <- RIM9 %>% mutate(proportion = n / sum(n))
RIM_props9$period <- 9

#10
GRS_rI_10$period <- 10
RImeans10 <- GRS_rI_10 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans10 <- merge(RImeans10, Dum, by = "mean", all = TRUE)
RImeans10$period <- 10
RIM10 <- RImeans10 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props10 <- RIM10 %>% mutate(proportion = n / sum(n))
RIM_props10$period <- 10

#11
GRS_rI_11$period <- 11
RImeans11 <- GRS_rI_11 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans11 <- merge(RImeans11, Dum, by = "mean", all = TRUE)
RImeans11$period <- 11
RIM11 <- RImeans11 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props11 <- RIM11 %>% mutate(proportion = n / sum(n))
RIM_props11$period <- 11

#12
GRS_rI_12$period <- 12
RImeans12 <- GRS_rI_12 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans12 <- merge(RImeans12, Dum, by = "mean", all = TRUE)
RImeans12$period <- 12
RIM12 <- RImeans12 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props12 <- RIM12 %>% mutate(proportion = n / sum(n))
RIM_props12$period <- 12

#13
GRS_rI_13$period <- 13
RImeans13 <- GRS_rI_13 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans13 <- merge(RImeans13, Dum, by = "mean", all = TRUE)
RImeans13$period <- 13
RIM13 <- RImeans13 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props13 <- RIM13 %>% mutate(proportion = n / sum(n))
RIM_props13$period <- 13

#14
GRS_rI_14$period <- 14
RImeans14 <- GRS_rI_14 %>%
  dplyr::group_by(code) %>%
  dplyr::summarise(mean = round(mean(ri), digits = 1), n = n())

Dum <- as.data.frame(seq(0, 1, by=0.1))
colnames(Dum)[1] <- "mean"
RImeans14 <- merge(RImeans14, Dum, by = "mean", all = TRUE)
RImeans14$period <- 14
RIM14 <- RImeans14 %>% group_by(mean) %>% summarise(n = length(n[!is.na(n)]))

RIM_props14 <- RIM14 %>% mutate(proportion = n / sum(n))
RIM_props14$period <- 14



#ggplot(RIM_props, aes(x=period, y=mean, size = proportion)) +
#  geom_point(alpha=0.6, color = "blue") +
#  scale_y_continuous(limits = c(0, 1.0), breaks = seq(0,1, by =0.2)) +
#  scale_size(range = c(0, max(RIM_props$proportion*40)), name="Proportion of n") +
#  theme_ipsum() +
#  theme(legend.position="bottom") +
#  theme(axis.title.x=element_blank())

#plot together
ggplot() +
  geom_point(data = RIM_props1, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  scale_y_continuous(limits = c(0, 1.0), breaks = seq(0,1, by =0.2)) +
  scale_x_continuous(limits = c(0, 14), breaks = seq(1,14, by =1)) +
  scale_size(range = c(0, max(RIM_props4$proportion*50)), name="Proportion of n") +
  
  geom_point(data = RIM_props2, aes(x=period, y=mean, size = proportion),
            alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props3, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props4, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props5, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props6, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props7, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props8, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props9, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props10, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props11, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props12, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#CC3517") +
  geom_point(data = RIM_props13, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  geom_point(data = RIM_props14, aes(x=period, y=mean, size = proportion),
             alpha=0.6, color = "#0084CC") +
  #scale_y_continuous(limits = c(0, 1.0), breaks = seq(0,1, by =0.2)) +
  #scale_size(range = c(0, max(RIM_props2$proportion*40)), name="Proportion of n") +
  theme_ipsum() +
  theme(legend.position="bottom") +
  theme(axis.text.x=element_text(angle=-45))

#blue: #4B83B4FF
#red: darkred
#####################

#####################

#model!!
  
  #plot
  
  library(ggpubr)

ggscatter(GRS_rI, x = "ri", y = "SE_score", color = "#666666",
          cor.coef = TRUE, 
          cor.method = "spearman",
          add = "reg.line",
          add.params = list(color = "red"),
          xlab = "residency index", ylab = "RESET")

#plot to see if it varies with atoll
library(viridis)

ggplot(dat = GRS_rI, aes(x = ri, y = SE_score, colour = atoll))+
  geom_point()+
  geom_smooth(method = 'glm', se = FALSE)+
  scale_color_viridis_d()+
  theme_bw() +
  labs(x = "Residency Index") +
  labs(y = "RESET")



########## MODEL DATA ###########

n_last <- 2 # Specify number of characters to extract
GRS_rI$month <- as.numeric(substr(GRS_rI$monthyear, 
                                  nchar(GRS_rI$monthyear) - n_last + 1, nchar(GRS_rI$monthyear)))

# make season column
GRS_rI <- GRS_rI %>%
  mutate(season = ifelse(month %in% 04:09, 'dry.season', 'wet.season')) %>% 
  mutate_if(is.character, as.factor)       


#make year column

GRS_rI$year <- as.factor(substr(GRS_rI$monthyear,1,4))


#randomly reorder data
set.seed(2347723) 

GRS_rI <- GRS_rI[sample(1:nrow(GRS_rI)), ]



library(TMB)
library(glmmTMB)
library(lme4)
library(lmerTest)

GRS_rI$ud <- as.numeric(GRS_rI$ud)


mRI <- glmmTMB(ud/days_lib  ~ scale(SE_score) + season + year + sex + scale(TL) +
               (1|code) + (1|station), weights=days_lib , family=binomial(),  
             data=GRS_rI, na.action=na.fail)

summary(mRI)

check_collinearity(mRI)


mRI_dredge <- MuMIn::dredge(mRI)

mRI_cs <- subset(mRI_dredge, delta <= 2, recalc.weights = TRUE)

#check nesting
mRI_nested <- as.data.frame(nested(mRI_cs))

#two model following nesting checks so model average
# keep top two

mRI_cs <- mRI_cs[1:2, ]

summary(model.avg(mRI_cs))
RI_sw <- as.data.frame(sw(mRI_cs))

str(GRS_rI)

mRI_fin <- glmmTMB(ud/days_lib  ~ scale(SE_score) + season + sex + year + 
                     (1|code) + (1|station), weights=days_lib , family=binomial(), data=GRS_rI, na.action=na.fail)

summary(mRI_fin)

MuMIn::r.squaredGLMM(mRI_fin)


#get confidenct itnervals

confint(mRI_fin)
mRI_fin_confint <- as.data.frame(round(confint(mRI_fin), 3))

tiff('Het_test_V3.tiff', units="in", width=13.5, height=11, res=300)
Res <- resid(mRI_fin,type='response')
Fit <- fitted(mRI_fin)

#make plots of Residuals against Fitted on same page in 2rows x 2 columns
par(mfrow=c(2,2))
#Make the plots
plot(x=Fit, y=Res, xlab="Fitted Values", ylab="Residuals", main="Residuals Vs Fitted values for Final Model", cex.main=2, cex.lab=1.5,cex.axis=1.5)
hist (Res, nclass=15, xlab="Residuals", main="Histogram of Residuals for Final Model", cex.main=2, cex.lab=1.5,cex.axis=1.5)

acf(residuals(mRI_fin),main="ACF of Residuals for Final Model", cex.main=2, cex.lab=1.5,cex.axis=1.5)
dev.off()


########## LAG EFFECTS ###########

# make data set
# mean monthly residency and 

lag_dat <- GRS_rI %>% 
  dplyr::group_by(monthyear) %>% 
  dplyr::summarise(mean_ri = mean(ri), mean_SE = mean(SE_score))

#use ccf function to look at lags


ccf(lag_dat$mean_SE, lag_dat$mean_ri)

tiff('LagPlot.tiff', units="in", width=13.5, height=11, res=300)
ccf(lag_dat$mean_SE, lag_dat$mean_ri, lag = 18, main= "Combined environmental SE index & residency index")
dev.off()

print(ccf(lag_dat$mean_SE, lag_dat$mean_ri))


########## TESTING DATA WITH OUT EL NINO YEARS ##########

# add El nino years to dataset

GRS_rI$Date <- paste0(GRS_rI$monthyear,"/01")
GRS_rI$Date  <- as.Date(GRS_rI$Date, "%Y/%m/%d")

str(GRS_rI)


GRS_rI <- within(GRS_rI, {
  ElNino <- NA
  ElNino [Date >= "2013/02/10" & Date <= "2014/06/30"] <- "Non-ElNino"
  ElNino [Date >= "2014/07/01" & Date <= "2016/06/30"]  <- "ElNino"
  ElNino [Date >= "2016/07/01" & Date <= "2018/06/30"] <- "Non-ElNino"
  ElNino [Date >= "2018/07/01" & Date <= "2019/06/30"] <- "ElNino"
  ElNino [Date >= "2019/07/01" & Date <= "2020/11/30"] <- "Non-ElNino"
})

GRS_ElNino <- GRS_rI %>% 
  dplyr::filter(ElNino == "Non-ElNino")



#run models on this dataset

mRI_EN <- glmmTMB(ud/days_lib  ~ scale(SE_score) + season + year + sex + scale(TL) +
                 (1|code) + (1|station), weights=days_lib , family=binomial(),  
               data=GRS_ElNino, na.action=na.fail)

summary(mRI_EN)

check_collinearity(mRI_EN)


mRI_EN_dredge <- MuMIn::dredge(mRI_EN)

mRI_cs_EN <- subset(mRI_EN_dredge, delta <= 2, recalc.weights = TRUE)

#check nesting
mRI_EN_nested <- as.data.frame(nested(mRI_cs_EN))

#two model following nesting checks so model average
# keep top two

mRI_cs_EN <- mRI_cs_EN[1:2, ]

summary(model.avg(mRI_cs_EN))
#RI_sw <- as.data.frame(sw(mRI_cs))


mRI_fin_EN <- glmmTMB(ud/days_lib  ~ scale(SE_score) + season + year + 
                     (1|code) + (1|station), weights=days_lib , family=binomial(), data=GRS_ElNino, na.action=na.fail)

summary(mRI_fin_EN)

MuMIn::r.squaredGLMM(mRI_fin_EN)

#get confidenct itnervals

confint(mRI_fin_EN)
mRI_EN_confint <- as.data.frame(round(confint(mRI_fin_EN), 3))

########## EFFECT PLOTS ##########


# make effects plots per station, colour by atoll

#Data
station <- GRS_rI


#Extract Random Effects
dd <-as.data.frame(ranef(mRI_fin))
#Subset To Sstation
dd_st <-subset(dd,grpvar=="station")
names(dd_st)[4]<-"station"
#dd_st[,4]<-as.numeric(as.character(dd_st[,4]))
head(dd_st)

#Unique Data Frame of station
head(station)
station_unique <-unique(station[,c("station","atoll")])
dim(station_unique)

#Copy Across atoll data

dd_st <-left_join(dd_st,station_unique,"station")    
dd_st$station <-as.factor(dd_st$station )

#Ordering 
dd_st$station <-factor(dd_st$station,levels=dd_st$station[order(dd_st$condval)])

#Add ~95% intervals (conservatively set to 2 not 1.96)
head(dd_st)
dd_st$l95<-with(dd_st,condval -2*condsd)
dd_st$u95<-with(dd_st,condval +2*condsd)

# Colour Variables Based on atoll
library(viridis)
atoll_cols <- viridis(5)
dd_st$col <-ifelse(dd_st$atoll=="Benerares",atoll_cols[1],
                   ifelse(dd_st$atoll=="Blenheim",atoll_cols[2],
                          ifelse(dd_st$atoll=="Peros Banhos", atoll_cols[3],
                                 ifelse(dd_st$atoll=="Benerares",atoll_cols[4], 
                                        atoll_cols[5]))))

#Set Colour for those Crossing Zero to Grey   
dd_st$cross_zero<- with(dd_st, l95<=0 & u95>=0)
dd_st$col[which(dd_st$cross_zero==TRUE)]<-"gray77"
  
#Set Alpha For those crossing zero 
dd_st$alphaval<-1
dd_st$alphaval[which(dd_st$cross_zero==TRUE)]<-0.5

#make plot

cat1 <- ggplot(dd_st, aes(y=station,x=condval)) + 
  scale_fill_viridis_d() + 
  geom_vline(xintercept=0,linetype=2) +
  geom_errorbarh(aes(xmin=l95,
                     xmax=u95),alpha=dd_st$alphaval, height=0) +
  geom_point(aes(fill=atoll),alpha=dd_st$alphaval,shape=21,size=5) + 
  labs(x="Departure from Global Intercept (Logits)",y="Receiver")



cat2 <-cat1 + theme_bw() + 
  theme(axis.text.x = element_text(size=18),axis.text.y = element_text(size=14),
        axis.title = element_text(size=20),strip.text = element_text(size=20),
        legend.text = element_text(size=20),legend.title=element_blank(),
        legend.position = "top")

pdf('Fig3_a.pdf',width=12, height=10)
#png('Station_RandomEffects.png', units="in", width=12, height=10, res=600) 
cat2 
dev.off()

dd_st %>% 
  dplyr::count(cross_zero)

n_distinct(GRS_rI$station)


##### RECEIVER MAPS #####

library(rgdal)#select recs just from right atolls
RECS_atolls <- RECS[RECS$region%in% atolls, ]


#chagos <- readOGR("C:/Users/Mike Williamson/Dropbox/ReefShark_RESET/R","Chagos_v6")
#chagos <- readOGR("/Users/michaelwilliamson/Dropbox/ReefShark_RESET/R","Chagos_v6")

library(sf)
chagos <- st_read("Chagos_v6.shp")

library(broom) 
library(sf)# need so can use ggplot on shape file
chagos_tidy <- as_tibble(chagos)

#get bathymetry data
library(marmap)

Bathy <- getNOAA.bathy(lon1 = 71.3, lon2 = 72.8,
                       lat1 = -5.65, lat2 = -5.08, resolution = 1)



library(ggsn)
library(legendMap)
library(extrafont)

fonts()
loadfonts()

# can't get the legend to work ion ggsn or the north to work for legendMap so using both.
#tiff('receiver_bathy.tiff', units="in", width=10.5, height=6, res=300)
png('receiver_bathy.png', units="in", width=10.5, height=6, res=300)

#pdf('receiver_bathy.pdf', width=10.5, height=6)

site_map <- autoplot.bathy(Bathy, geom=c("tile"), coast = FALSE) +
  scale_fill_gradient2(low="dodgerblue4", mid = "lightblue", high="gainsboro", name = "Depth (m)") +
  geom_polygon(data = chagos_tidy, aes(x = long, y = lat, group = group), color = "#333333", linewidth = 0.25, fill = "lightgrey", alpha = 0) +
  geom_point(data=RECS_atolls, aes(x = longitude, y = latitude, group = region), color = "red", alpha=0.75, size = 3) +
  scale_x_continuous(limits = c(71.3, 72.8))+
  scale_y_continuous(limits = c(-5.65, -5.08), breaks=seq(-5.6, -5.2, 0.2))+
  coord_equal() +
  theme_minimal() +
  theme(panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(colour = "black", size=22),
        axis.title.y = element_text(colour = "black", size=22),
        axis.text.x = element_text(colour = "black", size=20),
        axis.text.y = element_text(colour = "black", size=20),
        legend.text = element_text(colour="black", size = 20),
        legend.title = element_text(colour="black", size=22))+
  xlab("Longitude") + ylab("Latitude")+
  north(chagos_tidy, scale = 0.03, symbol = 3, 
        anchor = c(x = 72.79, y = -5.1)) +
  scale_bar(lon = 71.4, lat = -5.19, #-4.76
            distance_lon = 25, distance_lat = 2.5,
            distance_legend = 8, dist_unit = "km", legend_size=5)+
  geom_text(label="Blenheim Reef", x=72.61, y=-5.22,color = "black", family = "Arial")+
  geom_text(label="Salomon Islands", x=72.42, y=-5.33,color = "black", family = "Arial")+
  geom_text(label="Benares Shoal", x=71.52, y=-5.25,color = "black",family = "Arial")+
  geom_text(label="Peros Banhos", x=71.85, y=-5.5,color = "black", family = "Arial")+
  geom_text(label="Victory Bank", x=72.25, y=-5.57,color = "black", family = "Arial")

png('Figure 1.png', units="in", width=10.5, height=6, res=600)

site_map + theme(text = element_text(family = "Arial"))

dev.off()

# new plot for mapping global average scores to receivers
# need so can use ggplot on shape file
library(broom)
library(sf)

chagos <- st_read("Chagos_v6.shp")
atoll_cols2 <- c("#0827F5", "#FFA400", "gray77")

dd_st$glob_av <-ifelse(dd_st$condval >0, "sig-pos", "sig-neg")

dd_st$glob_av[which(dd_st$cross_zero==TRUE)]<-"non-sig"

#add lat lon data

dd_st$longitude <- RECS$longitude[match(dd_st$station, RECS$id)]
dd_st$latitude <- RECS$latitude[match(dd_st$station, RECS$id)]

png('receiver_globav.png', units="in", width=12, height=6, res=600) 
pdf('Fig2_b.pdf', width=12, height=6)


cat3 <- ggplot() +
  geom_sf(data = chagos, color = "#333333", fill = "lightgrey", alpha = 0) +
  # Add your other layers as needed
  geom_point(data=dd_st, aes(x = longitude, y = latitude, fill=glob_av), alpha=dd_st$alphaval,shape=21,size=5) +
  scale_fill_manual(values = atoll_cols2,
                    name="",
                    breaks=c("sig-pos", "sig-neg", "non-sig"),
                    labels=c("Significantly greater", "Significantly less", "Non-significant"))+
  # Customize other plot components as needed
  scale_x_continuous(limits = c(71.5, 72.6))+
  scale_y_continuous(limits = c(-5.62, -5.1), breaks=seq(-5.6, -5.2, 0.2))+
  coord_sf() +  # Use coord_sf() for sf objects
  theme_minimal() +
  theme(panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.text = element_text(colour="black", size = 20),
        legend.title = element_text(colour="black", size=22),
        legend.position="bottom",
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  geom_text(aes(label="Blenheim", x=72.55, y=-5.22), color = "black")+
  geom_text(aes(label="Salomon", x=72.36, y=-5.33), color = "black")+
  geom_text(aes(label="Benares", x=71.58, y=-5.25), color = "black")+
  geom_text(aes(label="Peros Banhos", x=71.85, y=-5.5), color = "black")+
  geom_text(aes(label="Victory Bank", x=72.25, y=-5.58), color = "black")

cat3

dev.off()

#combine two plots

library(patchwork)

cat2 / cat3

## new scatter plot for supplementary

# add el nino column

ElNino1 = data.frame(x1=as.Date(c("2014-07-01", "2015-07-01", "2018-07-01")), 
                     x2=as.Date(c("2015-06-30","2016-06-30", "2019-01-01")), 
                     y1=c(-Inf,-Inf,-Inf), 
                     y2=c(Inf,Inf,Inf))

# add date to GRS_rI

GRS_rI$Date <- paste0(GRS_rI$monthyear,"/01")
GRS_rI$Date  <- as.Date(GRS_rI$Date, "%Y/%m/%d")

REC.RESET$Date <- paste0(REC.RESET$monthyear,"/01")
REC.RESET$Date  <- as.Date(REC.RESET$Date, "%Y/%m/%d")
REC.RESET$year <- as.factor(strftime(REC.RESET$Date, format="%Y"))


#combine

ggplot()+ 
  geom_rect(data=ElNino1, mapping= aes(xmin = x1, ymin = y1, xmax = x2, ymax =y2), fill = "orange", alpha = 0.5,
            inherit.aes = FALSE)+
  geom_point(data = REC.RESET, mapping = aes(x=Date, y=SE_score))+
  geom_smooth(data = REC.RESET, mapping = aes(x=Date, y=SE_score), colour ="blue")+
  theme_bw()+
  scale_x_date(date_breaks = "6 month", date_labels = "%Y/%m",
               limits = c(as.Date("2013/01/01"), NA)) +  # Adjust the expand argument
  xlab("Date") +
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust = 1))+
  scale_y_continuous(
    "combined environmental SE score")+
  theme(axis.title.x = element_text(colour = "black", size=22),
        axis.title.y = element_text(colour = "black", size=22), 
        axis.text.x = element_text(colour = "black", size=20), 
        axis.text.y = element_text(colour = "black", size=20))

# temporal GAM model of RESET

date_base <- as.Date("2013/03/01")
class(date_base)
class(REC.RESET$Date)

#chnage date to days and look if there is a significant impact of day number over time

REC.RESET$Date_num <- as.numeric(difftime(REC.RESET$Date, date_base, unit = "days"))

#change atoll to factor so model runs

REC.RESET$atoll <- as.factor(REC.RESET$atoll)

RESET_Mod <- gam(SE_score ~ s(Date_num) + s(atoll, bs = 're'),
                 data = REC.RESET)

summary (RESET_Mod)

plot(RESET_Mod, shade = TRUE, residuals = TRUE)


# spatial model of RESET

#plot data

ggplot(REC.RESET, aes(x= atoll, y=SE_score))+
  geom_boxplot()


# betareg
install.packages("betareg")
library(betareg)

SE_mod <- betareg(SE_score ~ atoll*year,  data=REC.RESET)

summary(SE_mod)

ggplot(data = REC.RESET, aes( x= SE_score))+
  geom_histogram()


# get interaction results



#emmeans for interaction
emm4.1 <- emmeans(SE_mod , specs = pairwise ~ atoll|year)
#emm4.2 <- emmeans(SE_mod , specs = pairwise ~ atoll|year, type = "response")#type = "response"

emm4.2 <- emm4.1$contrasts %>% 
  summary(infer = TRUE) %>% 
  as.data.frame()
  

emm4.2$p.value <- as.numeric(emm4.2$p.value)
class(emm4.2$p.value)

emm4.2$p.value <- round(emm4.2$p.value, digits = 5)

emm4.2 <- subset(emm4.2, select = -c(df))

write.csv(emm4.2 ,"emm_atoll_withinYear.csv")

citation("betareg")


########### PLOTTING KUD DATA ##########

generate_group_label <- function(date) {
  year <- as.integer(format(date, "%Y"))
  month <- as.integer(format(date, "%m"))
  
  if (year < 2013 || year > 2020) {
    label <- NA
  } else if (month >= 7) {
    label <- paste0("Jul - Dec ", year)
  } else {
    label <- paste0("Jan - Jun ", year)
  }
  
  return(label)
}

# Apply the function to create a new column for group labels
GRS_fin$yr_grps <- sapply(GRS_fin$date, generate_group_label)


GRS_ade <- GRS_fin

#need to make UTM coordinates

coords1 <- GRS_ade
coordinates(coords1) <-c("receiver_lon", "receiver_lat")
proj4string (coords1) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")#intial coordinate system
coords2 <- spTransform(coords1, CRS("+proj=utm +zone=42 +south +datum=WGS84 +units=m +no_defs"))##transform to UTM
proj4string(coords2)

#add utm coordinates to main file
GRS_ade$X_UTM <-coordinates(coords2)[,1]
GRS_ade$Y_UTM<-coordinates(coords2)[,2]

# Define a vector of all yr_grps values
yr_groups <- c("Jul - Dec 2013", 
               "Jan - Jun 2014", "Jul - Dec 2014",
               "Jan - Jun 2015", "Jul - Dec 2015",
               "Jan - Jun 2016", "Jul - Dec 2016",
               "Jan - Jun 2017", "Jul - Dec 2017",
               "Jan - Jun 2018", "Jul - Dec 2018",
               "Jan - Jun 2019", "Jul - Dec 2019",
               "Jan - Jun 2020")

# Function to create plot and store data
create_and_save_plot <- function(yr_group) {
  # Filter data for the current yr_group
  GRS_sp <- GRS_ade %>%
    dplyr::filter(yr_grps == yr_group) %>%
    dplyr::select(species, X_UTM, Y_UTM)
  
  # Calculate n_GRS for the current yr_group
  n_GRS <- GRS_ade %>%
    dplyr::filter(yr_grps == yr_group) %>%
    dplyr::summarise(codeCount = n_distinct(code)) %>%
    dplyr::pull(codeCount)
  
  coordinates(GRS_sp) <- c("X_UTM","Y_UTM")
  proj4string(GRS_sp) <- CRS("+proj=utm +zone=42 +south +datum=WGS84 +units=m +no_defs")
  
  kud <- kernelUD(GRS_sp[,1], h = "href")  # href = the reference bandwidth
  
  ii <- kernel.area(kud, percent=seq(50, 95, by=5), unout = "km2")
  ii_std <- ii / n_GRS
  
  ii_std <- rownames_to_column(ii_std, var = "KUD")
  ii_std$KUD <- as.numeric(ii_std$KUD)
  
  
  # Return the data frame with the year group as an additional column
  return(cbind(ii_std, yr_group = yr_group))
}

# Apply the function to each yr_group and store the resulting data frames in a list
all_data <- map(yr_groups, create_and_save_plot)

# Combine all data frames into a single table
combined_data <- bind_rows(all_data)

# Print the first few rows of the combined table
head(combined_data)

#Make plot

# reorder

combined_data$yr_group <- factor(combined_data$yr_group, levels = yr_groups)

# Create plot
p <- ggplot(combined_data, aes(x = KUD, y = Grey.Reef.Shark, group = 1)) +
  geom_line() +
  geom_point() +
  xlab("KUD %") + ylab(expression("KUD Area (km"^2*")")) +
  facet_wrap( ~ yr_group, ncol=3)+
  theme_bw() +
  #scale_x_continuous(breaks = seq(0, max(ii_std$KUD), by = 10)) +
  theme(panel.grid.minor = element_blank())

p

# create plot
# group by KUD %
# timne on x-axis

cb3 <- c("#E69F00", "#009E73", "#CC79A7")

#filter KUD data for 50, 75 and 95

combined_data_filt <- combined_data %>% 
  dplyr::filter(KUD == "50" | KUD == "75" | KUD == "95" ) %>% 
  dplyr::mutate(KUD = as.factor(KUD))

str(combined_data_filt)

combined_data_filt$yr_group <- factor(combined_data_filt$yr_group, levels = yr_groups)

# add date column for each year group

combined_data_filt <- combined_data_filt %>%
  dplyr::mutate(yr_group = as.character(yr_group)) %>%
  dplyr::mutate(yr_group = str_trim(yr_group))

# Convert yr_group to date
combined_data_filt <- combined_data_filt %>%
  dplyr::mutate(
    middle_date = case_when(
      str_detect(yr_group, "Jan - Jun") ~ paste0("01-04-", sub("Jan - Jun ", "", yr_group)),
      str_detect(yr_group, "Jul - Dec") ~ paste0("01-10-", sub("Jul - Dec ", "", yr_group))
    ),
    middle_date = dmy(middle_date) # Convert to date format
  )

str(combined_data_filt)

combined_data_filt$yr_group <- factor(combined_data_filt$yr_group, levels = yr_groups)

#KUD plot
ggplot(combined_data_filt, aes(x = middle_date, y = Grey.Reef.Shark, group = KUD, colour = KUD )) +
  geom_line() +
  geom_point() +
  xlab("KUD %") + ylab(expression("KUD Area (km"^2*")")) +
  scale_color_manual(values = cb3)+
  theme_bw() +
  theme(panel.grid.minor = element_blank())

#RI plot

RI_plot_DAT <- GRS_rI

str(RI_plot_DAT)

# add yr group label

RI_plot_DAT$yr_group <- sapply(RI_plot_DAT$Date, generate_group_label)

RI_plot_DAT$yr_group <- factor(RI_plot_DAT$yr_group, levels = yr_groups)

# RI plot
ggplot()+
  geom_point(data = RI_plot_DAT, mapping = aes(x=Date, y=ri))+
  geom_smooth(data = RI_plot_DAT, mapping = aes(x=Date, y=ri), colour ="blue")+
  theme_bw()


#combining plots


# Define a transformation function to multiply values by 100
trans <- function(x) {
  x * 100
}

p1 <- ggplot() +
  geom_point(data = RI_plot_DAT, aes(x = Date, y = ri * 100), size = 1, alpha = 0.2) + # Multiply ri by 100
  geom_smooth(data = RI_plot_DAT, aes(x = Date, y = ri * 100), colour = "blue") + # Multiply ri by 100
  geom_point(data = combined_data_filt, aes(x = middle_date, y = Grey.Reef.Shark, group = KUD, colour = KUD),
             size = 2) +
  geom_line(data = combined_data_filt, aes(x = middle_date, y = Grey.Reef.Shark, group = KUD, colour = KUD),
            size = 1.5, alpha = 0.7) +
  scale_y_continuous(name = expression("KUD Area (km"^2*")"), sec.axis = sec_axis(trans = ~./100, name = "Residency Index")) + # Divide by 100 for secondary axis
  scale_color_manual(values = cb3,labels = paste0(levels(factor(combined_data_filt$KUD)), "%")) +
  scale_x_date(date_breaks = "6 month", date_labels = "%Y/%m",
               limits = c(as.Date("2013/01/01"), NA)) +  # Adjust the expand argument
  theme_bw()+
  theme(legend.position = "bottom")+
  theme(axis.title.x = element_text(colour = "black", size=22),
        axis.title.y = element_text(colour = "black", size=22), 
        axis.text.x = element_text(colour = "black", size=20), 
        axis.text.y = element_text(colour = "black", size=20))+
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust = 1))

pdf('Fig1_v1.pdf',width=12, height=10)
p1
dev.off()

# rescaling

p2 <- ggplot() +
  geom_point(data = RI_plot_DAT, aes(x = Date, y = ri * 200), size = 1, alpha = 0.2) + # Multiply ri by 100
  geom_smooth(data = RI_plot_DAT, aes(x = Date, y = ri * 200), colour = "blue") + # Multiply ri by 100
  geom_point(data = combined_data_filt, aes(x = middle_date, y = Grey.Reef.Shark, group = KUD, colour = KUD),
             size = 2) +
  geom_line(data = combined_data_filt, aes(x = middle_date, y = Grey.Reef.Shark, group = KUD, colour = KUD),
            size = 1.5, alpha = 0.7) +
  scale_y_continuous(name = expression("KUD Area (km"^2*")"), sec.axis = sec_axis(trans = ~./200, name = "Residency Index")) + # Divide by 100 for secondary axis
  scale_color_manual(values = cb3,labels = paste0(levels(factor(combined_data_filt$KUD)), "%")) +
  scale_x_date(date_breaks = "6 month", date_labels = "%Y/%m",
               limits = c(as.Date("2013/01/01"), NA)) +  # Adjust the expand argument
  theme_bw()+
  theme(legend.position = "bottom")+
  coord_cartesian(ylim = c(0, 110))+
  theme(axis.title.x = element_text(colour = "black", size=22),
        axis.title.y = element_text(colour = "black", size=22), 
        axis.text.x = element_text(colour = "black", size=20), 
        axis.text.y = element_text(colour = "black", size=20))+
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust = 1))

pdf('Fig1_v2.pdf',width=12, height=10)
p2
dev.off()

 ########## GRS DETECTION GAPS DATA ##########
 
dayDiff <- GRS_fin %>%
  dplyr::arrange(code, datetime) %>% 
  dplyr:: mutate(code_changed = code != lag(code),
                 month_changed = monthyear != lag(monthyear),
                 day_diff = ifelse(code_changed | month_changed, NA, as.numeric(date - lag(date), units = "days")))

ggplot(dayDiff, aes(x=day_diff)) + geom_histogram()


generate_group_label <- function(date) {
  year <- as.integer(format(date, "%Y"))
  month <- as.integer(format(date, "%m"))
  
  if (year < 2013 || year > 2020) {
    label <- NA
  } else if (month >= 7) {
    label <- paste0("Jul - Dec ", year)
  } else {
    label <- paste0("Jan - Jun ", year)
  }
  
  return(label)
}

# Apply the function to create a new column for group labels
dayDiff$yr_grps <- sapply(dayDiff$date, generate_group_label)
#dayDiff1$yr_grps <- sapply(dayDiff1$date, generate_group_label)

# create box plot of detection gaps per yr group
#reorder
yr_groups <- c("Jul - Dec 2013", 
               "Jan - Jun 2014", "Jul - Dec 2014",
               "Jan - Jun 2015", "Jul - Dec 2015",
               "Jan - Jun 2016", "Jul - Dec 2016",
               "Jan - Jun 2017", "Jul - Dec 2017",
               "Jan - Jun 2018", "Jul - Dec 2018",
               "Jan - Jun 2019", "Jul - Dec 2019",
               "Jan - Jun 2020")

#filter just for apporporiate groups
dayDiff <- dayDiff %>%
  filter(yr_grps %in% yr_groups)

dayDiff$yr_grps <- factor(dayDiff$yr_grps, levels = yr_groups)

unique(dayDiff$yr_grps)


## may be do on summary stats

unique(dayDiff$monthyear)

day_diffSum <- dayDiff %>% 
  dplyr::select(code, date, monthyear, yr_grps, day_diff) %>%
  dplyr::filter(day_diff > 0) %>% 
  dplyr::group_by(code, monthyear) %>% 
  na.omit() %>% 
  dplyr::filter(day_diff >= 1) %>% 
  dplyr::summarise(median_dd = median(day_diff),
                   mean_dd = mean(day_diff),
                   max_dd = max(day_diff),
                   yr_grps = first(yr_grps),
                   Date = first(date)) %>% 
  ungroup()

#add el nino to data frame

day_diffSum <- within(day_diffSum , {
  ElNino <- NA
  ElNino [Date >= "2013/02/10" & Date <= "2014/06/30"] <- "Non-ElNino"
  ElNino [Date >= "2014/07/01" & Date <= "2016/06/30"]  <- "ElNino"
  ElNino [Date >= "2016/07/01" & Date <= "2018/06/30"] <- "Non-ElNino"
  ElNino [Date >= "2018/07/01" & Date <= "2019/06/30"] <- "ElNino"
  ElNino [Date >= "2019/07/01" & Date <= "2020/11/30"] <- "Non-ElNino"
})

str(day_diffSum)

day_diffSum$ElNino <- as.factor(day_diffSum$ElNino)

# summary data

summary(day_diffSum$median_dd)

n_distinct(day_diffSum$code)

#Statistical tests

# Can't use signed rank as unequal group sizes

#use brunnermunzel

library(brunnermunzel)

ElNino_values <- day_diffSum$median_dd[day_diffSum$ElNino == "ElNino"]
NonElNino_values <- day_diffSum$median_dd[day_diffSum$ElNino == "Non-ElNino"]

brunnermunzel.test(ElNino_values, NonElNino_values)

brunnermunzel.test(median_dd ~ ElNino, data = day_diffSum)
