# Libraries------------------------------------------------------------------
library(R.utils)
# tidyverse bliss
library(tidyverse)
library(magrittr)
library(timetk)
library(tidyquant)  # Loads tidyverse, financial pkgs, used to get data
library(tsibble)
library(dplyr)

#library(tidymodels)
library(feasts)
library(pracma)     # for repmat
library(tsbox)      # convert everything to anything
library(reshape2)
library(tibbletime)

library(quantreg)
library(limSolve)

# file navigation
library(here)

#raster
library(raster)

# access python
library(reticulate)

#visualisation
library(RColorBrewer)

#geographical distance
library(geosphere)

#changepoint detection
library(changepoint)
library(changepoint.np)
library(pastecs)

library(robustbase)
library(scam)
# Explanation ------------------------------------------------------------------
# his script can be used, when the icesat download has been run
# before, but the filtering and/or fitting process of 
# icesat elevation data to surface occurence shall be improved

# 1. USER INPUT------------------------------------------------------------------
## Working directories
dir_3 <- paste0(here(),"/")
dir_2 <- paste0(dirname(here()), "/STEP_2/")
## path to SWOP maps exported from GEE in Step 1:
dir_surf_occ <- paste0(dirname(here()), "/STEP_1/SurfWaterFreq_S2_17to22_Lakes_combination_v5/")

setwd(dir_2)

# 2. Filter method ------------------------------------------------------------------
# FILTER METHOD 1 and 2
# based on rolling mean elevation difference (implementation see below)
# filter 1: dh_min=0.1, filter 2: dh_min=0.05
dh_min = 0.1

# FILTER method 3
# based on inherent icesat slope property
# minimum absolute slope to classify icesat atlas 2 photon data as land instead of water surface (at the water surface the slope is small)
min_slope <- 1
# maximum absolute slope, otherwise photon segment is considered an outlier
max_slope <- 20

# WEIGHTED FIT
# the weight which will be applied to the upper left and lower right datapoints
weighted <- 4

# STRING TO DEFINE FIT AND FILTER FOR STORING PLOTS
str_to_store <- "SR_atl08and13_hOrtho_b"

# 3. Data ------------------------------------------------------------------
# load csv files with icesat data ATL13
all_icesat_data_files <- list.files(paste0(dir_2, 'Bathymetries/icesat_data_atl13'))
all_icesat_data_files<-all_icesat_data_files[(which(grepl('csv', all_icesat_data_files)))]

# load csv files with icesat data ATL08
all_icesat_data_files_atl08 <- list.files(paste0(dir_2, 'Bathymetries/icesat_data_atl08'))
all_icesat_data_files_atl08<-all_icesat_data_files_atl08[(which(grepl('csv', all_icesat_data_files_atl08)))]

ids_processed<-NULL
# identify the IDs that have already been processed
for (i in all_icesat_data_files_atl08){#
  id0 <- strsplit(i, "ID_")[[1]][2]
  ids_processed <-  rbind(ids_processed,strsplit(id0, ".csv")[[1]][1])
}
# load surface occurence percentiles map
all_surface_occ_prob_maps <- list.files(dir_surf_occ)

# 4. Functions------------------------------------------------------------------
source('functions.R')

# 5. Main Code------------------------------------------------------------------

# tibble to summarize status of the DEM creation process for all lakes
message = tibble(f_map = all_surface_occ_prob_maps, status='' )
df_for_geeupload = tibble('id_no'= as.character(NA), 'lake_id'=as.character(NA),
                          'min_elev_data_filtered' = as.numeric(NA), 'max_elev_data_filtered' = as.numeric(NA),
                          # 'min_elev_data_filtered2' = as.numeric(NA), 'max_elev_data_filtered2' = as.numeric(NA),
                          # 'min_elev_fit' = as.numeric(NA), 'max_elev_fit' = as.numeric(NA),
                          'min_elev_data_all' =  as.numeric(NA),
                          'max_elev_data_all' =  as.numeric(NA),
                          'max_elev_perc_fit' = as.numeric(NA),
                          'min_elev_perc_fit' = as.numeric(NA),
                          # 'elev_range_covered' = as.numeric(NA),
                          # 'elev_range_covered_ATL13' = as.numeric(NA),
                          'min_occ' = as.numeric(NA), 'max_occ'= as.numeric(NA), 'occ_range_covered'=as.numeric(NA),
                          'data_pts' = as.numeric(NA),'elevation_intervals' = as.numeric(NA),'rsq'=as.numeric(NA),
                          'rsq_all' = as.numeric(NA),
                          'rmse'=as.numeric(NA),
                          # 'frac_below'=as.numeric(NA),
                          'sill'=as.numeric(NA),
                          'max_date'=as.Date(NA),'min_date'=as.Date(NA),'Tier'=as.numeric(NA))

# This map allows to convert 'height above the WGS84 ellipsoid' to 'orthometric heights'
# Define local file path
local_file <- file.path(here(), "us_nga_egm2008_1.tif")
# Download the file if it doesn't exist
if (!file.exists(local_file)) {
  download.file("https://grid-partner-share.s3.amazonaws.com/egm2008/us_nga_egm2008_1.tif", 
                destfile = local_file, 
                mode = "wb")
}

# Load the raster from the local file
proj2hortho <- raster(local_file)

raster::plot(proj2hortho, col=brewer.pal(9, "Blues"), main='us_nga_egm2008_1.tif')

## Uncomment if you want to use the correction.csv file 
corrections<-read.csv(paste0(dir_3,"corrections.csv"))

# iterate through surface occurrence maps 
for (i in all_surface_occ_prob_maps[c(1:length(all_surface_occ_prob_maps))]){

  # i<-all_surface_occ_prob_maps[1]

  # get water body ID
  id0 <- strsplit(i, "_")[[1]]
  id <-  strsplit(id0[length(id0)] , ".tif")[[1]][1]
  idnum <- str2num(id)

  # print(which(all_surface_occ_prob_maps==i))
  print(paste0("Processing ID ", id))
  # read according icesat data from csv file
  csv_to_read <- all_icesat_data_files[(which(grepl(paste0("icesat_data_ID_",id,".csv"), all_icesat_data_files)))]
  # csv_to_read <- all_icesat_data_files[(which(grepl(id, all_icesat_data_files)))]
  t1_filtered=data.frame()
  t1_filtered_mod=data.frame()
  remove(t)
  # remove(delta_ortho)

  surface_occ_prob <- raster(paste0(dir_surf_occ,i))
  values(surface_occ_prob)[values(surface_occ_prob) == 0] = NA
  raster::plot(surface_occ_prob, col=brewer.pal(9, "Blues"), main=i)
  
  ## Uncomment if you want to use the corrections.csv file 
  
  # MaxElev<-corrections[which(corrections$ID==idnum),] %>% filter(MaxElev > 0)
  # MaxElev<-min(MaxElev$MaxElev,10e9)
  # MinElev <-corrections[which(corrections$ID==idnum),] %>% filter(MinElev > 0)
  # MinElev<-max(MinElev$MinElev,0)
  # wo_Date  <-corrections[which(corrections$ID==idnum),]$wo_Date 
  # wo_Date <- paste0(sub(" ","",wo_Date),"")
  
  # 6. ATL 13------------------------------------------------------------------
  for (j in csv_to_read){
    
    # if no csv file is available for this surf occurence map
    if (length(csv_to_read)==0){print('No Icesat-data  ATL13 available')
      message$status[message$f_map==i] <- ('No Icesat-data ATL13 available')
      # next
      }
      
    # check whether the csv file is empty
    try(t <- read.csv(paste0(dir_2, 'Bathymetries/icesat_data_atl13/', csv_to_read)))
    if (!exists('t', inherits=FALSE)){print('No Icesat-data ATL13 available')
      message$status[message$f_map==i] <- ('No Icesat-data ATL13 available')
      next}
    
    if (isempty(t)){print('No Icesat-data available')
      message$status[message$f_map==i] <- ('No Icesat-data ATL13 available')
      next}
  
    # remove duplicates:
    t <- unique( t[ , ] )
    coords<-cbind(t$mean_lon, t$mean_lat)  
    # retrieve percentile for each elevation value from the contour map
    t <- t %>% mutate(percentile=raster::extract(surface_occ_prob, coords,'simple') ) #alternatively use 'bilinear'
    
    # retrieve transformation to orthometric heights for each elevation value
    t <- t %>% mutate(dh_ortho=raster::extract(proj2hortho, coords,'simple') )
    # 1) remove points when percentile is not defined
    t_filtered <- t %>% filter((!is.na(percentile)))
    
    # SR: no information can be extracted at maximum and minimum percentile (i.e., likely there is always or never water)
    raster_withvalues <- setMinMax(surface_occ_prob)
    max_occ_prob <- maxValue(raster_withvalues)
    min_occ_prob <- minValue(raster_withvalues)
    
    t_filtered <- t_filtered %>%
      filter(percentile>0.025) %>%
      filter(percentile>min_occ_prob) %>%
      filter(percentile<0.99) %>%
      filter(percentile<max_occ_prob) 
  ## Uncomment if you want to use the corrections.csv file 
    # t_filtered<- t_filtered %>% filter(ht_ortho<MaxElev) %>%
    #   filter(ht_ortho>MinElev)  %>%
    #   filter(date!=wo_Date)
  
    if (dim(t_filtered)[1]<3){
      print('Too little data for reliable fit.')
      message$status[message$f_map==i] <- ('Too little data for reliable fit.')
      next
    }
    t1_filtered<-t_filtered
    
    t_filtered<-t_filtered %>% mutate(h_te_best_fit=t_filtered$ht_water_surf - t_filtered$dh_ortho )   
    t_filtered<-t_filtered %>% mutate(delta_ortho=t_filtered$ht_ortho  - t_filtered$h_te_best_fit )  
    # delta_ortho: mean error in the transformation to orthometric heights based on the ATL13 product
    delta_ortho<-mean(t_filtered$delta_ortho)
    print(paste('delta ortho: ',delta_ortho))
    t1_filtered$trackID<-abs(t1_filtered$gtx)
    
    t1_filtered<-t1_filtered[,c("ht_ortho","mean_lat","mean_lon","percentile","date","gtx","trackID")] 
    t1_filtered_mod=t1_filtered[,c("ht_ortho","mean_lat","mean_lon","percentile","date","gtx","trackID")]
  }

  t2_filtered_debiased=data.frame()
  t_level=data.frame()
  t_date_prc=data.frame()
  
  # 7. ATL 08 ------------------------------------------------------------------
  csv_to_read2 <- all_icesat_data_files_atl08[(which(grepl(paste0("icesat_data_20m_ID_",id,".csv"), all_icesat_data_files_atl08)))]
  remove(t2)
  # if no ATL08 file is available for this surf occurence map
  if (length(csv_to_read2)==0){print('No Icesat-data ATL08 available')
    if (length(csv_to_read)==0){print('No Icesat-data available')
      message$status[message$f_map==i] <- ('No Icesat-data ATL08 available')
      next
    } else {message$status[message$f_map==i] <- ('No Icesat-data ATL08 available')}
      
  } else {
    try(t2 <- read.csv(paste0(dir_2,'Bathymetries/icesat_data_atl08/', csv_to_read2)))
  }

    # check whether the csv file is empty
  # if (exists('t2', inherits=FALSE)){#print('Icesat-data ATL08 available')
      # next
    
  
  # remove duplicates:
    t2 <- unique( t2[ , ] )
      
    ## 7.1 Filtering ICESat-2 DATA -----------------------------------------------------------
    
    #for the DEM generation consider only the points with a horizontal distance >50m (otherwise use ATL13)
    t2_new=t2 # %>% filter(atl8$d>50)
    
    # coordinate vector for raster::extract
    coords<-cbind(t2_new$mean_lon, t2_new$mean_lat)
    # retrieve percentile for each elevation value from the contour map
    t2_new <- t2_new %>% mutate(percentile=raster::extract(surface_occ_prob, coords,'simple') )#alternatively use 'bilinear'
    # retrieve transformation to orthometric heights for each elevation value
    t2_new <- t2_new %>% mutate(dh_ortho=raster::extract(proj2hortho, coords,'simple') )    
    # 1) remove points when percentile is not defined
    t2_filtered <- t2_new %>% filter((!is.na(percentile)))

    # 3) if percentile lower than 0.025 or higher than 0.99 -> NA
    # if higher than maximum percentile or lower than minimum percentile in the frequency map -> NA
    # likely that these areas of the map are not very well defined (due to few cloudy scenes and bad classifications)
    # 
    raster_withvalues <- setMinMax(surface_occ_prob)
    max_occ_prob <- maxValue(raster_withvalues)
    min_occ_prob <- minValue(raster_withvalues)
    
    t2_filtered <- t2_filtered %>%
      filter(percentile>0.025) %>% #0.05
      filter(percentile>min_occ_prob)  %>%
      filter(percentile<0.99) %>%
      filter(percentile<max_occ_prob)
    
    t2_filtered <- t2_filtered %>% mutate(h_te_best_fit=t2_filtered$h_te_best_fit - t2_filtered$dh_ortho )    

    if (exists('delta_ortho', inherits=FALSE)){
      delta_ortho=first(c(delta_ortho,0),na.rm=TRUE) 
      t2_filtered <- t2_filtered %>% mutate(h_te_best_fit=t2_filtered$h_te_best_fit + delta_ortho ) 
      remove(delta_ortho)
    }
    
    ## Uncomment if you want to use the corrections.csv file 
        # t2_filtered <- t2_filtered %>%
        #   filter(h_te_best_fit<MaxElev) %>%
        #   filter(h_te_best_fit>MinElev)  %>%
        #   filter(date!=wo_Date)   
    
    t2_filtered$trackID<-abs(t2_filtered$gtx)
    
    #merge ATL08 and ATL13
    t2_filtered<-t2_filtered[,c("h_te_best_fit","mean_lat","mean_lon","percentile","date","gtx","trackID")]
    names(t2_filtered)=c("ht_ortho","mean_lat","mean_lon","percentile","date","gtx","trackID")
    t2_filtered <- t2_filtered %>% filter((!is.na(ht_ortho )))
    
    # 7.2 PROCESS ATL08 -----------------------------------------------------------
    t_filtered<-t2_filtered 
    datetmp=unique(t_filtered$date)
    if (dim(t2_filtered)[1]>0){
      for (u1 in 1:length(datetmp)){
        # print(paste("u1:",u1))
        ttmp=t_filtered %>% filter(date ==datetmp[u1])
        tracktmp=unique(ttmp$trackID)

          print(paste("date:", datetmp[u1]))
          # print(paste("date:", datetmp[u1], 'track:',u))
          # ttmp=t_filtered%>% filter(trackID ==tracktmp[u])%>% filter(date ==datetmp[u1])

          if (dim(t1_filtered)[1]>0){
            
            # ttmp2<-t1_filtered %>% filter(trackID ==tracktmp[u]) %>% filter(date ==datetmp[u1])
            ttmp2<-t1_filtered %>% filter(date ==datetmp[u1])
            if (dim(ttmp2)[1]>0){
              

            # lets consider here also the ATL13 data from the same track
              ttmp=rbind(ttmp,ttmp2)
                t1_filtered_mod=t1_filtered_mod[-c(which(t1_filtered_mod$date ==datetmp[u1])),]
              # }
            } 
          } 
          
          t2_filtered_debiased=rbind(t2_filtered_debiased,ttmp)
          
          #iterate over percentiles (0.01 steps, unique values)
          otmp=data.frame()
          # prctmp=sort(unique(round(ttmp$percentile*2,1)/2))
          prctmp=sort(unique(round(ttmp$percentile,2)))
          for (v in 1:length(prctmp)){
            # calculate median of every column
            # otmp=rbind(otmp,apply(ttmp[c(which(round(ttmp$percentile*2,1)/2==prctmp[v])),!(names(ttmp)=="date")], 2, median))
            otmp=rbind(otmp,apply(ttmp[c(which(round(ttmp$percentile,2)==prctmp[v])),!(names(ttmp)=="date")], 2, median))
          }
          names(otmp)=names(ttmp[,!(names(ttmp)=="date")])
          otmp$date<-datetmp[u1]

          # continue only if there are at least 2 observations
          if(dim(otmp)[1]<=2){
            ck=999
          } else {
            #FIT A MONOTONOUSLY DECREASING FUNCTION AND CHECK R2
            remove(check)
            check<-lm(otmp$ht_ortho~prctmp)
            ck=check$coefficients[2]
          }
          if (ck>=0){
            # but points could still contain important information about possible minimum and maximum lake level
            if (dim(ttmp)[1]>4 & dim(otmp)[1]>2 & (max(otmp$ht_ortho)-min(otmp$ht_ortho)<1) ){
              t_level_tmp=otmp[1,]
              t_level_tmp$ht_ortho=median(otmp$ht_ortho)
              t_level=rbind(t_level,t_level_tmp)
            }
            # plot(otmp[,"percentile"],otmp[,"ht_ortho"], type = "l")
            # title(paste0('u1:',u1,', u: ',u, 'Trend not decreasing'))
            # print(paste0('u1:',u1,', u: ',u,', elevation not decreasing or too little data'))
            # if elevation does not decrease with increasing occurrence percentile
            next}
          # FIND IF THERE IS A CHANGE POINT --> DETECT WATER EDGE
          # need to add a condition - too many nice profiles with dry lake bed get kicked out!
          # --> check the elevation range beyond the change point, e.g. the 50% quantile, or max-min
          # --> move one point forward and check the elevation range again. If the difference is > 0.1 keep it as new change point
          # if the new change point includes an outlier it may still be filtered out later

          # Data must have atleast 4 observations to fit a changepoint model.
          landtmp=otmp
          if(dim(otmp)[1]>3){

            m1.cusum=cpt.meanvar(otmp$ht_ortho,pen.value=1,penalty='Manual',test.stat='Normal')
            # plot(otmp[,"percentile"],otmp[,"ht_ortho"], type = "l")

          if (length(cpts(m1.cusum))>0){
            chgpnt=cpts(m1.cusum)
            otmp_water=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"]+0.1)
            # points(otmp[c(chgpnt),"percentile"],otmp[c(chgpnt),"ht_ortho"],'col'='red')
            otmp_water_max=max(otmp_water$ht_ortho)
            if (is.finite(otmp_water_max)){
              otmp_landwater=otmp %>% filter(otmp$percentile >= otmp[c(chgpnt),"percentile"])
              otmp_land_max=max(otmp_landwater$ht_ortho)
              while ((otmp_land_max-otmp_water_max)>dh_min){
                chgpnt=chgpnt+1
                if (chgpnt==(dim(otmp)[1])){
                  otmp_water_max=otmp_land_max
                } else {
                  otmp_water=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"]+0.1)
                  otmp_water_max=max(otmp_water$ht_ortho)
                  otmp_landwater=otmp %>% filter(otmp$percentile >= otmp[c(chgpnt),"percentile"])
                  otmp_land_max=max(otmp_landwater$ht_ortho)
                }
              }}

            # points(otmp[c(chgpnt),"percentile"],otmp[c(chgpnt),"ht_ortho"],'col'='blue','pch'=4)
            ## title(paste0(datetmp[u1],', Track: ',tracktmp[u]))
            # title(paste0('u1:',u1,', u: ',u))
            # remove values beyond the change point

            laketmp=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"])
            # estimate lake level and remove all values +- 5cm
            landtmp=otmp %>% filter(otmp$percentile <= otmp[c(chgpnt),"percentile"])
            if (dim(laketmp)[1]>0){
              landtmp=landtmp%>% filter(landtmp$ht_ortho > (median(laketmp$ht_ortho)+dh_min))
              landtmp = rbind(landtmp,otmp[c(chgpnt),])
            } else {
              landtmp=landtmp%>% filter(landtmp$ht_ortho > (landtmp[c(dim(landtmp)[1]),"ht_ortho"]+dh_min))
              # otmp = rbind(landtmp,otmp[c(chgpnt),])
            }
            # the changepoint itself should be included in any case
            # otmp = rbind(landtmp,otmp[c(chgpnt),])
          }}
          
          if (min(landtmp$percentile)>median(otmp$percentile)){
            #means that land is where the higher percentiles are and water where the lower percentiles are, which is not possible
            #print('track removed')
            next
          }
          otmp = landtmp
          
          otmp <- unique( otmp[ , ] )
          # remove outliers
          # otmp_ts <- rep(NA,length(prctmp))
          # for (v in 1:length(prctmp)){
          #   # calculate median of every column
          #   # otmp=rbind(otmp,apply(ttmp[c(which(round(ttmp$percentile*2,1)/2==prctmp[v])),!(names(ttmp)=="date")], 2, median))
          #   if (dim(otmp[c(which(otmp$prc==prctmp[v])),])[1]>0){
          #     otmp_ts[v]=otmp[c(which(otmp$prc==prctmp[v])),]$ht_ortho 
          #   }
          # } 
          # outlier removal based on time-series analysis
          if (dim(otmp)[1]>10){
            y <- hampel(otmp$ht_ortho,4)  # increasing 'k' decreases sensitivity
            if (length(y$ind)>0){
              otmp=otmp[-c(y$ind),]
              print(paste('number of outliers removed with hampel: ',length(y$ind)))
            }
          }
          # outlier removal based on distribution
          if (dim(otmp)[1]>=5){
            y <- remove_outliers(otmp$ht_ortho)
            yind<-which(is.na(y))
            if (length(yind)>0){
              otmp=otmp[-c(yind),]
              print(paste('number of outliers removed with remove_outliers: ',length(yind)))
            }
          }
          # points(otmp$percentile,otmp$ht_ortho,'col'='green','pch'=5)
          
          if (max(otmp$percentile)-min(otmp$percentile)<=0.05){
            # if the track only covers a very small percentile range
            # print(paste0('u1:',u1,', u: ',u,', this track covers a too small percentile range'))
            next}
          if (min(otmp$percentile)>0.3){
            # if the track only covers a small percentile range here something got wrong
            if (max(otmp$percentile)-min(otmp$percentile)<0.1){
              # if the track only covers a very small percentile range
              # print(paste0('u1:',u1,', u: ',u,', this track covers a small percentile range'))
              next}
            }
          mp_fit<-NULL
          try(mp_fit <- fit_monpol(otmp))
          # the function may return NA if no fit was possible

          if (dim(otmp)[1]==0){
            # print('No more data')
            # iterate to the next surface occurence map
            next}
          if (is.null(mp_fit)){
            # print('Monotone Polynomial Regression failed.')
            # iterate to the next surface occurence map
            next}
          
          if (max(otmp$ht_ortho)-min(otmp$ht_ortho)<2*dh_min){
            t_level_tmp=otmp[1,]
            t_level_tmp$ht_ortho=median(otmp$ht_ortho)
            t_level=rbind(t_level,t_level_tmp)
            next} 
          
          # continue only if a linear trend is decreasing
          remove(check)
          if(dim(otmp)[1]<=2){
            # not enough data
            next
          }
          if(dim(otmp)[1]<=1){
            ck=999
          } else {if(dim(otmp)[1]<=5){
            check<-lm(otmp$ht_ortho ~ otmp$percentile)
            ck=check$coefficients[2]
	          ck=ck/(max(otmp$percentile)-min(otmp$percentile))/5
          } else {
            check<-lmrob(otmp$ht_ortho ~ otmp$percentile)
            ck=check$coefficients[2]
            ck=ck/(max(otmp$percentile)-min(otmp$percentile))/5
          }}
          
          if(is.na(check$coefficients[2])){
            # print('data not decreasing')
            # iterate to the next surface occurence map
            next}   
          # slope at least 0.1m per 0.2 percentile
          if (round(ck,1)>=-0.1){
            # print('data not decreasing')
            # iterate to the next surface occurence map
            next}
          t_date_prc=rbind(t_date_prc,otmp)

          # if the fit was possible
          # 1) generate data based on fit
          # mp_fit_df <- NULL
          # mp_fit_df <- fit_monpol_data(mp_fit)
          # occ_vs_elev_plot <- plot_occ_vs_elev(t, otmp, mp_fit_df, id)
          # plot(occ_vs_elev_plot)
          # title(paste0('u1:',u1,', u: ',u))
        }
      # }
    }
  
  # }
  # 8. PROCESS REMAINING ATL13 -----------------------------------------------------------
  if (dim(t1_filtered_mod)[1]>0){
    t_filtered<-t1_filtered_mod
    datetmp=unique(t_filtered$date)
  
    for (u1 in 1:length(datetmp)){
      ttmp=t_filtered %>% filter(date ==datetmp[u1])
      tracktmp=unique(ttmp$trackID)
      # print(paste0('number of tracks',length(tracktmp)))
      # small lakes: merge all tracks together
      # if average number of points per track is less than 10
      # if (dim(ttmp)[1]/length(tracktmp)<10){
      #   for (u in 1:length(tracktmp)){
      #     t_filtered[c(which(t_filtered$trackID==tracktmp[u])),]$trackID=tracktmp[1]
      #   }
      #   tracktmp=tracktmp[1]
      # }
      # for (u in 1:length(tracktmp)){
        # ttmp=t_filtered%>% filter(trackID ==tracktmp[u])%>% filter(date ==datetmp[u1])
        # print(paste0('u1:',u1,', u: ',u,', ,max elevation:',max(ttmp$ht_ortho)))
        
        #iterate over percentiles (0.01 steps, unique values)
        otmp=data.frame()
        # prctmp=sort(unique(round(ttmp$percentile*2,1)/2))
        prctmp=sort(unique(round(ttmp$percentile,2)))
        for (v in 1:length(prctmp)){
          # calculate median of every column
          # otmp=rbind(otmp,apply(ttmp[c(which(round(ttmp$percentile*2,1)/2==prctmp[v])),!(names(ttmp)=="date")], 2, median))
          otmp=rbind(otmp,apply(ttmp[c(which(round(ttmp$percentile,2)==prctmp[v])),!(names(ttmp)=="date")], 2, median))
        } 
        names(otmp)=names(ttmp[,!(names(ttmp)=="date")])
        otmp$date<-datetmp[u1]
        #FIT A MONOTONOUSLY DECREASING FUNCTION AND CHECK R2
        # check<-lm(prctmp~otmp$ht_ortho)
        # continue only if there are at least 2 observations 
        if(dim(otmp)[1]<=2){
          ck=999
        } else {
          #FIT A MONOTONOUSLY DECREASING FUNCTION AND CHECK R2
          remove(check)
	        check<-lm(otmp$ht_ortho~prctmp)
          ck=check$coefficients[2]
        }
        if (ck>=0){
          if (dim(ttmp)[1]>4 & dim(otmp)[1]>2 & (max(otmp$ht_ortho)-min(otmp$ht_ortho)<1) ){
            t_level_tmp=otmp[1,]
            t_level_tmp$ht_ortho=median(otmp$ht_ortho)
            t_level=rbind(t_level,t_level_tmp)
            # plot(otmp[,"percentile"],otmp[,"ht_ortho"], type = "l")
            # title(paste0('u1:',u1,', u: ',u, 'Trend not decreasing'))
          }
          # print(paste0('u1:',u1,', u: ',u,', elevation not decreasing or too little data'))
          # if elevation does not decrease with increasing occurrence percentile
          next}
        # FIND IF THERE IS A CHANGE POINT --> DETECT WATER EDGE
        # need to add a condition - too many nice profiles with dry lake bed get kicked out!
        # --> check the elevation range beyond the change point, e.g. the 50% quantile, or max-min
        # --> move one point forward and check the elevation range again. If the difference is > 0.1 keep it as new change point
        # if the new change point includes an outlier it may still be filtered out later
        # Data must have atleast 4 observations to fit a changepoint model.
        landtmp=otmp
        if(dim(otmp)[1]>3){
          m1.cusum=cpt.meanvar(otmp$ht_ortho,pen.value=1,penalty='Manual',test.stat='Normal')
          # plot(otmp[,"percentile"],otmp[,"ht_ortho"], type = "l")
        
        if (length(cpts(m1.cusum))>0){
          chgpnt=cpts(m1.cusum)
          otmp_water=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"]+0.1)
          # points(otmp[c(chgpnt),"percentile"],otmp[c(chgpnt),"ht_ortho"],'col'='red')
          otmp_water_max=max(otmp_water$ht_ortho)
          if (is.finite(otmp_water_max)){
          otmp_landwater=otmp %>% filter(otmp$percentile >= otmp[c(chgpnt),"percentile"])
          otmp_land_max=max(otmp_landwater$ht_ortho)
          while ((otmp_land_max-otmp_water_max)>dh_min){
            chgpnt=chgpnt+1
            if (chgpnt==(dim(otmp)[1])){
              otmp_water_max=otmp_land_max
            } else {
              otmp_water=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"]+0.1)
              otmp_water_max=max(otmp_water$ht_ortho)
              otmp_landwater=otmp %>% filter(otmp$percentile >= otmp[c(chgpnt),"percentile"])
              otmp_land_max=max(otmp_landwater$ht_ortho)            
            }
          }}
          
          # points(otmp[c(chgpnt),"percentile"],otmp[c(chgpnt),"ht_ortho"],'col'='blue','pch'=4)
          ## title(paste0(datetmp[u1],', Track: ',tracktmp[u]))
          # title(paste0('u1:',u1,', u: ',u))
          # remove values beyond the change point
          
          laketmp=otmp %>% filter(otmp$percentile > otmp[c(chgpnt),"percentile"])
          # estimate lake level and remove all values +- 5cm
          landtmp=otmp %>% filter(otmp$percentile <= otmp[c(chgpnt),"percentile"])
          if (dim(laketmp)[1]>0){
            landtmp=landtmp%>% filter(landtmp$ht_ortho > (median(laketmp$ht_ortho)+dh_min))
            landtmp = rbind(landtmp,otmp[c(chgpnt),])
          } else {
            landtmp=landtmp%>% filter(landtmp$ht_ortho > (landtmp[c(dim(landtmp)[1]),"ht_ortho"]+dh_min))
            # otmp = rbind(landtmp,otmp[c(chgpnt),])
          }
          # the changepoint itself should be included in any case
        }
          #points(landtmp$percentile,landtmp$ht_ortho,'col'='green','pch'=5)  
        if (min(landtmp$percentile)>median(otmp$percentile)|| min(landtmp$percentile)>(min(otmp$percentile)+(max(otmp$percentile)-min(otmp$percentile))/2)){
          #means that land is where the higher percentiles are and water where the lower percentiles are, which is not possible
          #print('track removed')
          next
        }
        otmp = landtmp
        
        if (max(otmp$percentile)-min(otmp$percentile)<=0.05){
          # if the track only covers a very small percentile range
          # print(paste0('u1:',u1,', u: ',u,', this track covers a too small percentile range'))
          next}
        if (min(otmp$percentile)>0.3){
            # if the track only covers a small percentile range here something got wrong
            if (max(otmp$percentile)-min(otmp$percentile)<0.1){
              # if the track only covers a very small percentile range
              # print(paste0('u1:',u1,', u: ',u,', this track covers a small percentile range'))
            next}
          }               
        
        mp_fit<-NULL
        try(mp_fit <- fit_monpol(otmp))
        # the function may return NA if no fit was possible
        
        if (dim(otmp)[1]==0){
          # print('No more data')
          # iterate to the next surface occurence map
          next}
        if (is.null(mp_fit)){
          # print('Monotone Polynomial Regression failed.')
          # iterate to the next surface occurence map
          next}
        
        if (max(otmp$ht_ortho)-min(otmp$ht_ortho)<2*dh_min){
          t_level_tmp=otmp[1,]
          t_level_tmp$ht_ortho=median(otmp$ht_ortho)
          t_level=rbind(t_level,t_level_tmp)
          next} 
        
        # check<-lm(otmp$percentile~otmp$ht_ortho)
        remove(check)
        # continue only if a linear trend is decreasing
        if(dim(otmp)[1]<=1){
          ck=999
        } else {if(dim(otmp)[1]<=5){
          check<-lm(otmp$ht_ortho ~ otmp$percentile)
          ck=check$coefficients[2]
	        ck=ck/(max(otmp$percentile)-min(otmp$percentile))/5
        } else {
          check<-lmrob(otmp$ht_ortho ~ otmp$percentile)
          ck=check$coefficients[2]
          ck=ck/(max(otmp$percentile)-min(otmp$percentile))/5
        }}
        
        if(is.na(check$coefficients[2])){
          # print('data not decreasing')
          # iterate to the next surface occurence map
          next}   
        # slope at least 0.1m per 0.2 percentile
        if (round(ck,1)>=-0.1){
          # print('data not decreasing')
          # iterate to the next surface occurence map
          next}
        
        # print(paste0('u1:',u1,', u: ',u))
        # print(ck)
        # print(ck/(max(otmp$percentile)-min(otmp$percentile))/5)
        # print(max(otmp$percentile)-min(otmp$percentile))
        otmp<-otmp[,c("ht_ortho","mean_lat","mean_lon","percentile","date","trackID","gtx")]
        t_date_prc=rbind(t_date_prc,otmp)
        
        # if the fit was possible
        # 1) generate data based on fit
        # mp_fit_df <- NULL
        # mp_fit_df <- fit_monpol_data(mp_fit)
        # occ_vs_elev_plot <- plot_occ_vs_elev(t, otmp, mp_fit_df, id)
        # plot(occ_vs_elev_plot)
        # title(paste0('u1:',u1,', u: ',u))
      }}
    # }  
  } else {print('No more ATL13 tracks left')}
  t_date_prc <- unique( t_date_prc[ , ] )
  
  if (dim(t_date_prc)[1]<3){
    print('Too little data for reliable fit.')
    message$status[message$f_map==i] <- ('Too little data for reliable fit.')
    next
  }
  # 9. Weiter: ---------------------------------------------------------
  # plot(t_date_prc[,"percentile"],t_date_prc[,"ht_ortho"], type = "p")
  otmp=data.frame()
  prctmp=sort(unique(round(t_date_prc$percentile*2,1)/2))
  # prctmp=sort(unique(round(t_date_prc$percentile,2)))
  #tie it to the extremes (lowest percentile)
  v=1
  otmp=rbind(otmp,apply(t_date_prc[c(which(round(t_date_prc$percentile*2,1)/2==prctmp[v])),!(names(t_date_prc)=="date")], 2, median))
  # otmp=rbind(otmp,apply(t_date_prc[c(which(round(t_date_prc$percentile,2)==prctmp[v])),!(names(t_date_prc)=="date")], 2, median))
  names(otmp)=names(t_date_prc[,!(names(t_date_prc)=="date")])
  # max of elevations
  # otmp[c(v),]$ht_ortho=max(t_date_prc[c(which(round(t_date_prc$percentile*2,1)/2==prctmp[v])),]$ht_ortho)
  otmp[c(v),]$ht_ortho=apply(t_date_prc[c(which(round(t_date_prc$percentile*2,1)/2==prctmp[v])),!(names(t_date_prc)=="date")], 2, quantile, probs=c(0.9), na.rm=TRUE)[1]
  otmp[c(v),]$percentile=prctmp[v]
  
  for (v in 2:length(prctmp)){
    # calculate median of every column
    otmp=rbind(otmp,apply(t_date_prc[c(which(round(t_date_prc$percentile*2,1)/2==prctmp[v])),!(names(t_date_prc)=="date")], 2, median))
    # otmp=rbind(otmp,apply(t_date_prc[c(which(round(t_date_prc$percentile,2)==prctmp[v])),!(names(t_date_prc)=="date")], 2, median))
    names(otmp)=names(t_date_prc[,!(names(t_date_prc)=="date")])
    # 50% quantile of elevations per 0.05 percentile step
    otmp[c(v),]$ht_ortho=apply(t_date_prc[c(which(round(t_date_prc$percentile*2,1)/2==prctmp[v])),!(names(t_date_prc)=="date")], 2, quantile, probs=c(0.5), na.rm=TRUE)[1]
    # otmp[c(v),]$ht_ortho=apply(t_date_prc[c(which(round(t_date_prc$percentile,2)==prctmp[v])),!(names(t_date_prc)=="date")], 2, quantile, probs=c(0.5), na.rm=TRUE)[1]
    otmp[c(v),]$percentile=prctmp[v]
  }
  otmp$percentile[c(which(otmp$percentile==1))]=0.975
  if (otmp[1,]$percentile==0){
    otmp[1,]$percentile=0.01
  }
  otmp$date<-datetmp[1]
  if(dim(t_level)[1]>0){
    if (min(t_level$ht_ortho)<min(otmp$ht_ortho)){
      print('minimum lake level data point added')
      t_level_tmp=t_level[c(which(t_level$ht_ortho==min(t_level$ht_ortho))),]
      if (dim(otmp[c(which(otmp$percentile==round(t_level_tmp$percentile*2,1)/2)),])[1]==1){
      # if (dim(otmp[c(which(otmp$percentile==round(t_level_tmp$percentile,2))),])[1]==1){
        otmp[c(which(otmp$percentile==round(t_level_tmp$percentile*2,1)/2)),]=t_level_tmp
        # otmp[c(which(otmp$percentile==round(t_level_tmp$percentile,2))),]=t_level_tmp
      } else {
        otmp=rbind(otmp,t_level_tmp)
      }
      t_date_prc=rbind(t_date_prc,t_level_tmp)
    }
  }
  #be careful with adding maximum points - it could be that everything is flooded. Only accept the lowest percentiles
  if (min(t_level$percentile)<0.05){
    t_level_tmp=t_level[c(which(t_level$percentile==min(t_level$percentile))),]
    if (t_level_tmp$ht_ortho>max(otmp$ht_ortho)){
      print('Maximum lake level data point added')
      if (dim(otmp[c(which(otmp$percentile==round(t_level_tmp$percentile*2,1)/2)),])[1]==1){
        otmp[c(which(otmp$percentile==round(t_level_tmp$percentile*2,1)/2)),]=t_level_tmp
      } else {otmp=rbind(otmp,t_level_tmp)}
    }
  }
  
  # plot(otmp[,"percentile"],otmp[,"ht_ortho"], type = "p")

  # # REPLACE VALUES AT EXTREMES BY EXTREMES
  ozmp=otmp
  # ozmp[c(which(ozmp$percentile==min(ozmp$percentile))),]$ht_ortho=max(ozmp$ht_ortho)
  min_level=min(ozmp$ht_ortho)
  max_level=max(ozmp$ht_ortho)
  ozmp[c(which(ozmp$percentile==max(ozmp$percentile))),]$ht_ortho=min(ozmp$ht_ortho)
  
  # CONSIDER ALSO the maximum of the filtered levels, 
  t_date_prc_max=max(t_date_prc[c(which(t_date_prc$percentile==min(t_date_prc$percentile))),]$ht_ortho)
  ozmp_max=max(ozmp[c(which(ozmp$percentile==min(ozmp$percentile))),]$ht_ortho)
  t_date_prc_min=min(t_date_prc[c(which(t_date_prc$percentile==max(t_date_prc$percentile))),]$ht_ortho)
  ozmp_min=min(ozmp[c(which(ozmp$percentile==max(ozmp$percentile))),]$ht_ortho)
  if (t_date_prc_max>ozmp_max){
    #make sure it is not the other extreme - that would be an outlier
    if (min_level!=ozmp[c(which(ozmp$percentile==min(ozmp$percentile))),]$ht_ortho){
      ozmp[c(which(ozmp$percentile==min(ozmp$percentile))),]$ht_ortho=t_date_prc_max
      ozmp[c(which(ozmp$percentile==min(ozmp$percentile))),]$percentile=min(t_date_prc$percentile)
    }
  }
  if (t_date_prc_min<ozmp_min){
    #make sure it is not the other extreme - that would be an outlier
    if (max_level!=ozmp[c(which(ozmp$percentile==max(ozmp$percentile))),]$ht_ortho){    
      ozmp[c(which(ozmp$percentile==max(ozmp$percentile))),]$ht_ortho=t_date_prc_min
      ozmp[c(which(ozmp$percentile==max(ozmp$percentile))),]$percentile=max(t_date_prc$percentile)
    }
  }
  
  otmp=ozmp
  if (dim(otmp)[1]<=5){
    print('Too little data for reliable fit.')
    message$status[message$f_map==i] <- ('Too little data for reliable fit.')
    next
  }
  
  t_filtered=otmp
  # t_filtered<- t_filtered %>% filter(!is.na(ht_ortho))
  # 10. Fitting: with monotonous constraint  ----------------------------------------------------------
  ## 10.1 generate data based on monotone polynomial fit ----------------------------------------------------------
  mp_fit<-NULL
  try(mp_fit <- fit_monpol(t_filtered))
  mp_fit_df <- NULL
  mp_fit_df <- fit_monpol_data(mp_fit)
  
  ## 10.2 COEFFICIENT OF DETERMINATION FIT ----------------------------------------------------------
  x <- t_filtered$percentile
  y <- t_filtered$ht_ortho
  preds<-my_predict_monpol(x,mp_fit$X)
  actual <- y
  rss <- sum((preds - actual) ^ 2)
  tss <- sum((actual - mean(actual)) ^ 2)
  rsq_pol <- 1 - rss/tss
  rsq <- rsq_pol
  rmse <- RMSE(preds - actual)
  
  ## 10.3 Check if logarithmic fit is better!----------------------------------------------------------
  d <- data.frame(x,y)  ## need to use data in a data.frame for predict()
  dexp <- data.frame(1-x,y)  ## need to use data in a data.frame for predict()
  
  remove(logEstimate)
  # logEstimate<-lmrob(y~log(x),data=d)
  logEstimate <- lm(y~log(x),data=d)
  # expEstimate<-nls(y~b*(1-x)^z,start = list(b = y[1], z = 0.1),data=dexp)
  
  xvec <- seq(0,1,0.001)
  logpred <- predict(logEstimate,newdata=data.frame(x=xvec),interval = "confidence")
  # exppred <- predict(expEstimate,newdata=data.frame(x=(1-xvec)))
  mp_fit_log<-data.frame(xvec,logpred)
  # mp_fit_exp<-data.frame((1-xvec),exppred)
  colnames(mp_fit_log)=c("percentile","Elevation","p0.025","p0.975")
  # colnames(mp_fit_exp)=c("percentile","Elevation")
  
  ## 10.4 COEFFICIENT OF DETERMINATION LOG FIT ----------------------------------------------------------
  preds <- predict(logEstimate,newdata=data.frame(x=x))
  actual <- y
  rss <- sum((preds - actual) ^ 2)
  tss <- sum((actual - mean(actual)) ^ 2)
  rsq_log <- 1 - rss/tss
  rmse_log <- RMSE(preds - actual)
  
  min_elev_data = min(t_filtered$ht_ortho)
  max_elev_data = max(t_filtered$ht_ortho)
  
  ## 10.5 CUBIC SPLINES INTERPOLATION ----------------------------------------------------------
  # plot(t_filtered[,"percentile"],t_filtered[,"ht_ortho"], type = "p")
  # # lines(spline(t_filtered[,"percentile"],t_filtered[,"ht_ortho"],xout =xvec),col=2)
  # # lines(spline(t_filtered[,"percentile"],t_filtered[,"ht_ortho"],n =101, method="hyman"),col=2)
  data = data.frame(t_filtered[,"ht_ortho"])
  names(data)<-'data'
  x1<-t_filtered[,"percentile"]
  k=size(x1,2)-1
  con_there = withTimeout(scam(y ~ s(x1, k = k, bs = "mpd"), data = data), timeout = 5, onTimeout = "silent") 
  if (isempty(con_there)){
    k=size(x1,2)-2
    con_there = withTimeout(scam(y ~ s(x1, k = k, bs = "mpd"), data = data), timeout = 5, onTimeout = "silent") 
  }
  if (!isempty(con_there)){
    con <- scam(y ~ s(x1, k = k, bs = "mpd"), data = data)
    # lines(t_filtered[,"percentile"],con$fitted.values,col=3)
    newd <- data.frame(x1=seq(0,1,0.001))
    fe <- predict(con,newd)
    # lines(newd$x1,fe,col=2)
    mp_fit_splines<-data.frame(xvec,fe)
    names(mp_fit_splines)<-c("percentile", "Elevation")
    
    # COEFFICIENT OF DETERMINATION splines
    preds <- con$fitted.values
    actual <- y
    rss <- sum((preds - actual) ^ 2)
    tss <- sum((actual - mean(actual)) ^ 2)
    rsq_splines <- 1 - rss/tss
    rmse_splines <- RMSE(preds - actual)
    
    max_elev = max(mp_fit_splines[c(which(mp_fit_splines$percentile>=min(t_filtered$percentile))),]$Elevation)
    min_elev = min(mp_fit_splines[c(which(mp_fit_splines$percentile<=max(t_filtered$percentile))),]$Elevation)
    elevfit_splines= 1-abs((max_elev_data-min_elev_data)-(max_elev-min_elev))/(max_elev_data-min_elev_data)
    gof_splines=(rsq_splines+elevfit_splines)/2
  } else {
    print('spline fitting did not converge')
    gof_splines=-99
  }
  
  f <- function(x){
    my_predict_monpol(x, mp_fit$X)}
  
  ## 10.6 CHOOSE THE FIT WITH THE HIGHER R2 ----------------------------------------------------------
  # -->not only the R2 should be the criterion but also how well the elevation range is reflected
  
  
  max_elev = max(mp_fit_df[c(which(mp_fit_df$percentile>=min(t_filtered$percentile))),]$Elevation)
  min_elev = min(mp_fit_df[c(which(mp_fit_df$percentile<=max(t_filtered$percentile))),]$Elevation)
  elevfit= 1-abs((max_elev_data-min_elev_data)-(max_elev-min_elev))/(max_elev_data-min_elev_data)
  gof=(rsq+elevfit)/2
  
  max_elev = max(mp_fit_log[c(which(mp_fit_log$percentile>=min(t_filtered$percentile))),]$Elevation)
  min_elev = min(mp_fit_log[c(which(mp_fit_log$percentile<=max(t_filtered$percentile))),]$Elevation)
  elevfit_log= 1-abs((max_elev_data-min_elev_data)-(max_elev-min_elev))/(max_elev_data-min_elev_data)
  gof_log=(rsq_log+elevfit_log)/2
  fittype="Monotone Polynomial fit"
  if (gof_log>gof){
    mp_fit_df=mp_fit_log
    rsq=rsq_log
    rmse=rmse_log
    gof=gof_log
    fittype="Log fit"
    
    f <- function(x){
      predict(logEstimate,newdata=data.frame(x=x))}
  }
  mp_fit_df0=mp_fit_df
  fittype0=fittype
  f0=f
  
  #USE THE SPLINES!
  if (gof_splines>gof){
    mp_fit_df=mp_fit_splines
    rsq=rsq_splines
    rmse=rmse_splines
    fittype="Monotonic Spline fit"
    f <- function(x){
      as.vector(predict(con,newdata=data.frame(x1=x)))}
  }

  x2 <- t_date_prc$percentile
  y2 <- t_date_prc$ht_ortho
  preds<-f(x2)
  actual <- y2
  rss <- sum((preds - actual) ^ 2)
  tss <- sum((actual - mean(actual)) ^ 2)
  rsq_pol <- 1 - rss/tss
  rsq2 <- rsq_pol
  
  # 11 FIND AND REMOVE SILLS! ----------------------------------------------------------
  t_filtered_all <- unique( rbind(t1_filtered,t2_filtered_debiased)[ , ] )
  t_filtered_all4plot=t_filtered_all %>%
    filter(ht_ortho>min(min(t_date_prc$ht_ortho),min(t_filtered$ht_ortho))-0.1)%>%
    filter(ht_ortho<max(max(t_date_prc$ht_ortho),max(t_filtered$ht_ortho))+0.1)
  
  mp_fit_df_filtered <- mp_fit_df %>%
    filter(percentile>=min(t_filtered$percentile)) %>%
    filter(percentile<=(max(t_filtered$percentile)+0.1))
  mp_fit_df_filtered$sill=99
  
  if (dim(mp_fit_df_filtered)[1]>200){
    for (mm in 1:(dim(mp_fit_df_filtered)[1]-200)){#
      mp_fit_df_filtered[mm,]$sill=mp_fit_df_filtered[mm,]$Elevation-mp_fit_df_filtered[mm+200,]$Elevation
    }
    if (min(mp_fit_df_filtered$sill)<0.15){
      t_filtered = t_filtered %>% filter(t_filtered$percentile<mp_fit_df_filtered[c(which(mp_fit_df_filtered$sill<0.15)),]$percentile[1])
    }
  }
  # if spline fitting leads to crazy sills try with the other fit
  if (dim(t_filtered)[1]>0 && (max(t_filtered$percentile)- min(t_filtered$percentile))/(max(t_filtered_all$percentile)-0.025)<0.45 && fittype=="Monotonic Spline fit"){
    mp_fit_df=mp_fit_df0
    fittype=fittype0
    f=f0
    t_filtered = otmp
    mp_fit_df_filtered <- mp_fit_df %>%
      filter(percentile>=min(t_filtered$percentile)) %>%
      filter(percentile<=(max(t_filtered$percentile)+0.1))
    mp_fit_df_filtered$sill=99
    if (dim(mp_fit_df_filtered)[1]>200){
      for (mm in 1:(dim(mp_fit_df_filtered)[1]-200)){#
        mp_fit_df_filtered[mm,]$sill=mp_fit_df_filtered[mm,]$Elevation-mp_fit_df_filtered[mm+200,]$Elevation
      }
      if (min(mp_fit_df_filtered$sill)<0.15){
        t_filtered = t_filtered %>% filter(t_filtered$percentile<mp_fit_df_filtered[c(which(mp_fit_df_filtered$sill<0.15)),]$percentile[1])
      }
    }
  }
  
  
  if (dim(t_filtered)[1]<=5){
    print('Too little data with relevant slope.')
    message$status[message$f_map==i] <- ('Too little data with relevant slope.')
    next
  }
  # 12. XY ----------------------------------------------------------
  max_elev_perc_fit = round(max(mp_fit_df[c(which(mp_fit_df$percentile>=min(t_filtered$percentile))),]$Elevation),2)
  min_elev_perc_fit = round(min(mp_fit_df[c(which(mp_fit_df$percentile<=max(t_filtered$percentile))),]$Elevation),2)
  min_elev_data_filtered = min(t_date_prc$ht_ortho)
  max_elev_data_filtered = max(t_date_prc$ht_ortho)
  range_perc_fit<-  max_elev_perc_fit - min_elev_perc_fit
  
  # range of values: ignore extremes if they are single outliers and increase delta_min/delta_max
  # available values exceed the values used for fitting
  if (round(min(t_date_prc$ht_ortho),2)<round(min(t_filtered$ht_ortho),2)){
    # the minimum value occurs only once in the whole dataset
    if (dim(t_filtered_all[c(which(t_filtered_all$ht_ortho<=min(t_date_prc$ht_ortho))),])[1]<=1){
      # the minimum value increases delta_min
      delta_min0<- abs((min_elev_data_filtered - min_elev_perc_fit)/range_perc_fit)
      tmp=t_date_prc[c(which(t_date_prc$ht_ortho>min(t_date_prc$ht_ortho))),]
      delta_min1<- abs((min(tmp$ht_ortho) - min_elev_perc_fit)/range_perc_fit)
      if (delta_min1<delta_min0){
        t_date_prc[c(which(t_date_prc$ht_ortho==min(t_date_prc$ht_ortho))),]$ht_ortho=min(min(tmp$ht_ortho),min(t_filtered$ht_ortho))
      } else {print(paste0('min value not removed! ',delta_min0))}
    }
  }
  if (round(max(t_date_prc$ht_ortho),2)>round(max(t_filtered$ht_ortho),2)){
    if (dim(t_filtered_all[c(which(t_filtered_all$ht_ortho>=max(t_date_prc$ht_ortho))),])[1]<=1){
      delta_max0<- abs((max_elev_data_filtered - max_elev_perc_fit)/range_perc_fit)
      tmp=t_date_prc[c(which(t_date_prc$ht_ortho<max(t_date_prc$ht_ortho))),]
      delta_max1<- abs((max(tmp$ht_ortho) - max_elev_perc_fit)/range_perc_fit)
      if (delta_max0>delta_max1){
        t_date_prc[c(which(t_date_prc$ht_ortho==max(t_date_prc$ht_ortho))),]$ht_ortho=max(max(tmp$ht_ortho),max(t_filtered$ht_ortho))
      } else {print(paste0('max value not removed! ',delta_max0))}
    }
  }
  # 13. Plot ----------------------------------------------------------
  occ_vs_elev_plot <- plot_occ_vs_elev(t_date_prc, t_filtered, t_filtered_all4plot, mp_fit_df, id,fittype)
  tmpocc=surface_occ_prob
  plot(occ_vs_elev_plot)
  ggsave(filename = paste0('elev_fit_to_', substring(i, 1, nchar(i)-3), 'png'),
         plot = occ_vs_elev_plot,
         path = paste0(dir_3, 'Bathymetries/fits/', str_to_store),
         width = 24, height = 12, dpi = 500, units = 'cm', bg='transparent')

  # t_date_prc[order(t_date_prc$percentile,decreasing = TRUE),]
  
  # 14. Generate metadata of the fit ----------------------------------------------------------
  data_for_geeupload = tibble('id_no'= paste0('DEMfrom_', substr(i, 1, nchar(i)-4)), 'lake_id' = id,
                              'min_elev_data_all' = round(min(t_filtered_all[c(which(t_filtered_all$percentile<=max(t_filtered$percentile)+0.025)),]$ht_ortho),2), 
                              'max_elev_data_all' = round(max(t_filtered_all[c(which(t_filtered_all$percentile>=min(t_filtered$percentile)-0.025)),]$ht_ortho),2),
                              'min_elev_data_filtered' = min(min(t_date_prc$ht_ortho),min(t_level[c(which(t_level$percentile>=max(t_filtered$percentile))),]$ht_ortho)), 
                              'max_elev_data_filtered' = max(max(t_date_prc$ht_ortho),max(t_level$ht_ortho)),
                               'max_elev_perc_fit' = round(max(mp_fit_df[c(which(mp_fit_df$percentile>=min(t_filtered$percentile))),]$Elevation),2),
                              'min_elev_perc_fit' = round(min(mp_fit_df[c(which(mp_fit_df$percentile<=max(t_filtered$percentile))),]$Elevation),2),
                              'min_occ' = min(t_filtered$percentile), 'max_occ'= max(t_filtered$percentile),
                              'occ_range_covered' = round((max(t_filtered$percentile)- min(t_filtered$percentile))/(max(t_filtered_all$percentile)-0.025),2),#cellStats(surface_occ_prob, max)
                              'data_pts' = nrow(distinct(t_filtered_all)),'elevation_intervals' = nrow(distinct(t_filtered)),'rsq'=round(rsq,2),
                              'rmse'=round(rmse/(max(mp_fit_df[c(which(mp_fit_df$percentile>=min(t_filtered$percentile))),]$Elevation)-
                                                   min(mp_fit_df[c(which(mp_fit_df$percentile<=max(t_filtered$percentile))),]$Elevation)),2),
                              'rsq_all'=round(rsq2,2),
                              'sill'=round(min(mp_fit_df_filtered$sill),2),
                              'max_date'=as.Date(max(as.numeric(as.Date(t_filtered_all$date)))),
                              'min_date'=as.Date(min(as.numeric(as.Date(t_filtered_all$date)))),'Tier'=1)


  # 15. Filtering based on metadata ----------------------------------------------------------
  # the fit should be based on at least 10 datapoints which cover 50 % of the surface occurrence range
  if ((data_for_geeupload$occ_range_covered<0.5)){
  # if (data_for_geeupload$data_pts < 10){
    print('The covered elevation range is not representative.')
    message$status[message$f_map==i] <- ('The covered elevation range is not representative.')
    next}

  if ((data_for_geeupload$data_pts < 10)){
    # if (data_for_geeupload$data_pts < 10){
    print('Less than 10 ATL13 data points.')
    message$status[message$f_map==i] <- ('Less than 10 ATL13 data points.')
    next}  
    
  # if the maximum elevation difference is more than 66m, abort
  if (round(max(t_filtered$ht_ortho),2)-round(min(t_filtered$ht_ortho),2)>66){
    paste0('Elevation Difference unrealistic (',round(max(t_filtered$ht_ortho),2)-round(min(t_filtered$ht_ortho),2),').')
    message$status[message$f_map==i] <- (paste0('Elevation Difference unrealistic (',round(max(t_filtered$ht_ortho),2)-round(min(t_filtered$ht_ortho),2),').'))
    next}  
  
  if (round(rsq,2) < 0.7){
    print('r2 very low.')
    message$status[message$f_map==i] <- ('r2 very low.')
    next}  

  # print((paste0('rmse: ', round(data_for_geeupload$rmse,3))))
  if (data_for_geeupload$rmse >0.15){
    print((paste0('rmse very low: ', round(data_for_geeupload$rmse,3))))
    message$status[message$f_map==i] <- (paste0('rmse very low: ', round(data_for_geeupload$rmse,3)))
    next}
  
  min_fit<-data_for_geeupload$min_elev_perc_fit
  if (min(t_date_prc$ht_ortho)==data_for_geeupload$min_elev_data_filtered){
    if (t_date_prc[c(which(t_date_prc$ht_ortho==data_for_geeupload$min_elev_data_filtered)),]$percentile[1]>max(t_filtered$percentile)){
      tmp<-round(t_date_prc[c(which(t_date_prc$ht_ortho==data_for_geeupload$min_elev_data_filtered)),]$percentile[1],2)
      # USE THE EXTRAPOLATED VALUE FROM mp_fit_df FOR A FAIRER COMPARISON
      min_fit<-mp_fit_df[which(round(mp_fit_df$percentile,3)==tmp),]$Elevation
    }
  }
  
  max_fit<-data_for_geeupload$max_elev_perc_fit
  if (max(t_date_prc$ht_ortho)==data_for_geeupload$max_elev_data_filtered){
    if (t_date_prc[c(which(t_date_prc$ht_ortho==data_for_geeupload$max_elev_data_filtered)),]$percentile[1]<min(t_filtered$percentile)){
      tmp<-round(t_date_prc[c(which(t_date_prc$ht_ortho==data_for_geeupload$max_elev_data_filtered)),]$percentile[1],2)
      # USE THE EXTRAPOLATED VALUE FROM mp_fit_df FOR A FAIRER COMPARISON
      max_fit<-mp_fit_df[which(round(mp_fit_df$percentile,3)==tmp),]$Elevation
    }
  }
  
  range_data_all<-	data_for_geeupload$max_elev_data_filtered - data_for_geeupload$min_elev_data_filtered
  range_perc_fit<-  max_fit - min_fit
  delta_min<- abs((data_for_geeupload$min_elev_data_filtered - min_fit)/range_perc_fit)
  delta_max<- abs((data_for_geeupload$max_elev_data_filtered - max_fit)/range_perc_fit)
  if (range_perc_fit/range_data_all <0.8){
    #attention, the range of the data may be dominated by outliers
    print('Fitted elevation range does not reflect data.')
    message$status[message$f_map==i] <- ('Fitted elevation range does not reflect data.')
    next}
  # if (delta_min > 0.25){
  if (delta_min > 0.2){
    print('Fitted minimum elevation does not reflect data.')
    message$status[message$f_map==i] <- ('Fitted minimum elevation does not reflect data.')
    next}
  # if (delta_max > 0.25){
  if (delta_max > 0.2){
    print('Fitted maximum elevation does not reflect data.')
    message$status[message$f_map==i] <- ('Fitted maximum elevation does not reflect data.')
    next}
  
  # # if the maximum elevation difference is less than 1 m, abort
  if ((max(mp_fit_df[-c(1),]$Elevation)-min(mp_fit_df[-c(1),]$Elevation))<0.5){
    print('Elevation Difference too small.')
    message$status[message$f_map==i] <- ('Elevation Difference too small.')
    next}
  
  if (data_for_geeupload$rsq_all <=0.5){
    print('rsq of filtered points extremely low')
    message$status[message$f_map==i] <- ('rsq of filtered points very low.')
    next}

  # 16. predict elevation from percentile map: ----------------------------------------------------------
  # tmpocc[tmpocc < 0.025] <- NA
  # tmpocc[tmpocc > 0.9] <- NA
  # f <- function(x){
  #   spline(t_filtered[,"percentile"],t_filtered[,"ht_ortho"],xout =x)$y}
  # 
  
  try(dem_icesat <- raster::calc(tmpocc, fun = f))
  if (!exists('dem_icesat')) {
    print('Monotone polynomial model could not be applied to the percentile map.')
    message$status[message$f_map==i] <- ('Monotone polynomial model could not be applied to the percentile map.')
    next
  }
  # library(rgdal)
  # crop_extent <- readOGR("D:/Dropbox (hydrosolutions)/Sustainable Water Management in Agriculture (Caritas, WI, SDC)/sahelian/DEMs/DEMs_Somalia/GIS/tmp/Abijiata.shp")
  # dem_icesat<-crop(dem_icesat, crop_extent)
  
  plot(dem_icesat, main='DEM derived from Icesat-2 data') 

  message$status[message$f_map==i]<-'DEM generated'

  # store DEM and fitting plot only if passed the filtering
  writeRaster(dem_icesat, paste0(dir_3, 'Bathymetries/DEMs/', str_to_store, '/DEMfrom_', substr(i, 1, nchar(i)-4),'.tif'),'overwrite'=TRUE)
  occ_vs_elev_plot <- plot_occ_vs_elev2(t_date_prc, t_filtered, t_filtered_all4plot, mp_fit_df, id,fittype)
  ggsave(filename = paste0('elev_fit_to_', substring(i, 1, nchar(i)-4), '_confirmed.png'),
       plot = occ_vs_elev_plot,
       path = paste0(dir_3, 'Bathymetries/fits/', str_to_store),
       width = 24, height = 12, dpi = 500, units = 'cm', bg='transparent')
  print('DEM generated')

  # 
  df_for_geeupload <- df_for_geeupload %>% add_row(data_for_geeupload)
  # rm(mp_fit)
  # rm(mp_fit_df)
  # rm(dem_icesat)
  # removeTmpFiles(h=1)

}

df_for_geeupload<- df_for_geeupload %>% filter(!is.na(id_no))
# store csv with dem metadata for geeupload
write_csv(df_for_geeupload, paste0(dir_3, '/Bathymetries/DEMs/', str_to_store, '/metadata_v2.csv'))

# store csv with messages
write_csv(message, paste0(dir_3, '/Bathymetries/DEMs/', str_to_store, '/summary_v2.csv'))


