# not in function
`%notin%` <- Negate(`%in%`)

# plot surface occurence vs elevation
plot_occ_vs_elev <- function(t, t_filtered, t2_filtered, mp_fit, id,fittype){
  
  surface_occurence_vs_elevation_plot <- ggplot(data=t, aes(x=percentile, y=ht_ortho))+
    geom_point(data=t2_filtered, color='black', shape=".") + 
    xlab('Lake Surface Occurence Percentile')+
    ylab('Elevation [m a.s.l.]')+
    ggtitle(paste0(fittype,': ', id))
    # geom_point(aes(color=factor(date)), shape=3, size=2)+
    # geom_smooth(aes(color=factor(date)), se=FALSE, size=0.5)+
    # geom_line(data=mp_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)+
    # geom_vline(xintercept=min(t_filtered$percentile))+
    # geom_vline(xintercept = max(t_filtered$percentile))+
   
  # if datapoints are left after the filtering steps, plot these
  if(nrow(t_filtered) != 0){
    surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
      geom_point(aes(color=factor(date)), shape=3, size=1,stroke = 1.5)+
      geom_vline(xintercept=min(t_filtered$percentile))+
      geom_vline(xintercept = max(t_filtered$percentile))  
  } else {surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
    annotate(geom = 'text', color='red', label = 'No ICESat data left after filtering', x=0.1, y=min(t$ht_ortho))  
  }
  surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
    geom_point(data=t_filtered, color='black', shape=21,fill = NA, size=1.5, stroke = 1.5)
    

  # if the monotone polynomial fit has been successful, plot it
  if(!all(mp_fit$Elevation==0)){
    surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
      geom_line(data=mp_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)}
   
   return(surface_occurence_vs_elevation_plot)
  }

# plot surface occurence vs elevation
plot_occ_vs_elev2 <- function(t, t_filtered, t2_filtered, mp_fit, id,fittype){
  
  surface_occurence_vs_elevation_plot <- ggplot(data=t, aes(x=percentile, y=ht_ortho))+
    geom_point(data=t2_filtered, color='black', shape=".") + 
    xlab('Lake Surface Occurence Percentile')+
    ylab('Elevation [m a.s.l.]')+
    ggtitle(paste0(fittype,': ', id))

  surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
    geom_point(data=t_filtered, color='black', shape=21,fill = NA, size=1.5, stroke = 1.5)
  
  
  # if the monotone polynomial fit has been successful, plot it
  if(!all(mp_fit$Elevation==0)){
    surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
      geom_line(data=mp_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)}
  
  return(surface_occurence_vs_elevation_plot)
}

plot_occ_vs_elev_ht_surf <- function(t, t_filtered, t2_filtered, mp_fit, id,fittype){
  
  surface_occurence_vs_elevation_plot <- ggplot(data=t, aes(x=percentile, y=ht_water_surf))+
    geom_point(data=t2_filtered, color='black', shape=".") + 
    xlab('Lake Surface Occurence Percentile')+
    ylab('Elevation [m a.s.l.]')+
    ggtitle(paste0(fittype,': ', id))

  # if datapoints are left after the filtering steps, plot these
  if(nrow(t_filtered) != 0){
    surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
      geom_point(aes(color=factor(date)), shape=3, size=1,stroke = 1.5)+
      geom_vline(xintercept=min(t_filtered$percentile))+
      geom_vline(xintercept = max(t_filtered$percentile))  
  } else {surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
    annotate(geom = 'text', color='red', label = 'No ICESat data left after filtering', x=0.1, y=min(t$ht_water_surf))  
  }
  surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
    geom_point(data=t_filtered, color='black', shape=21,fill = NA, size=1.5, stroke = 1.5)
  
  
  # if the monotone polynomial fit has been successful, plot it
  if(!all(mp_fit$Elevation==0)){
    surface_occurence_vs_elevation_plot <- surface_occurence_vs_elevation_plot+
      geom_line(data=mp_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)}
  
  return(surface_occurence_vs_elevation_plot)
}
# plot surface occurence vs elevation and compare the two fits
plot_occ_vs_elev_compare2_fits <- function(t, t_filtered, rq_fit, mp_fit){
  
  # plot the filtering data so far and the regression lines
  surface_occurence_vs_elevation_plot <- ggplot(data=t_filtered, aes(x=percentile, y=ht_ortho))+
    geom_point(aes(color=factor(date)), shape=3, size=2)+
    geom_line(data=rq_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)+
    geom_line(data=mp_fit, mapping = aes(x=percentile, Elevation), color='black', size=1)+
    geom_point(data=t, color='black', size=0.5)+
    xlab('Lake Surface Occurence Percentile')+
    ylab('Elevation [m a.s.l.]')+
    ggtitle('All Groundtracks and filtered')
  return(surface_occurence_vs_elevation_plot)
}

#---------------------------------------------------
# quantile regression of third order polynomial
fit_rq <- function(t_filtered){
  # try to fit quantile regression (0.2 quantile) to filtered icesat data
  fit <- NULL
  try(fit <- rq(ht_ortho ~ percentile + I(percentile^2)+ I(percentile^3), tau=0.2, data=t_filtered))
  # return 
  return(fit)
}

#--------------------------------------------------
# create dataframe based on the quantile regression fit
fit_rq_data <- function(fit){
  # create  dataframe to visualize the fit
  new <- data.frame(percentile=seq(min(t_filtered$percentile), max(t_filtered$percentile), 0.001))
  new$Elevation<-predict(fit, newdata=new)
  # check for which interval the regression model should be defined
  # only where elevation(percentile) is monotonously decreasing
  new$mono <- c(0, diff(new$Elevation))
  defined<-which(new$mono<0)
  min_def <- new$percentile[min(defined)]
  max_def <- new$percentile[max(defined)]
  
  # filter data within the defined interval 
  rq_fit_df <- new %>% 
    filter(percentile>min_def) %>% 
    filter(percentile<max_def)
  
  # this works only if the increasing part is at the borders
  # if the function is increasing somewhere in the middle, it will be sorted out in the main code

  return(list(rq_fit_df, min_def, max_def))
}


#---------------------------------------------------
# constrained linear model (monotonously decreasing)
# implementation from: 
# https://stats.stackexchange.com/questions/220614/linear-regression-polynomial-slope-constraint-in-r

fit_monpol <- function(t_filtered){
  # try to fit monotone polynomial
  constrained_model <- NULL
  x <- t_filtered$percentile
  y <- t_filtered$ht_ortho
  
  # see stackexchange
  A <- cbind(rep(1,length(x)),x,x^2,x^3)
  b <- y
  G <- matrix(nrow=3,ncol=4,byrow = TRUE,data = c(0, -1,-2,-3,0,-1,-2,0,0,-1,0,0))
  h <- rep(0,3)
  
  try(constrained_model <- lsei(A = A, B = b, G = G, H = h, type=2))
  
  # return 
  return(constrained_model)
}

fit_monpol_ht_surf <- function(t_filtered){
  # try to fit monotone polynomial
  constrained_model <- NULL
  x <- t_filtered$percentile
  y <- t_filtered$ht_water_surf
  
  # see stackexchange
  A <- cbind(rep(1,length(x)),x,x^2,x^3)
  b <- y
  G <- matrix(nrow=3,ncol=4,byrow = TRUE,data = c(0, -1,-2,-3,0,-1,-2,0,0,-1,0,0))
  h <- rep(0,3)
  
  try(constrained_model <- lsei(A = A, B = b, G = G, H = h, type=2))
  
  # return 
  return(constrained_model)
}

# prediction for constrained model
my_predict_monpol <- function(x,coefficients){
  X <- cbind(rep(1,length(x)),x,x^2,x^3)
  predictions <- X%*%coefficients
}

#--------------------------------------------------
# create dataframe based on the constrained polynomial regression fit
fit_monpol_data <- function(fit_mp){
  # create  dataframe to visualize the fit
  # only use the interval where we have data
  mp_fit_df <- data.frame(percentile=seq(0, 1, 0.001))
  mp_fit_df$Elevation<-my_predict_monpol(mp_fit_df$percentile,fit_mp$X)
  
  return(mp_fit_df)
}

# Create the function.
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

#----------------------------------------------
# compare filtering methods
#----------------------------------------------
# plot surface occurence vs elevation
# plot_occ_vs_elev <- function(t, t_filtered, rq_fit){
#   
#   # plot the filtering data so far and the regression lines
#   surface_occurence_vs_elevation_plot <- ggplot(data=t_withfilter_column, aes(x=percentile, y=ht_ortho))+
#     geom_point(aes(color=factor(date)), shape=3, size=2)+
#     # geom_smooth(aes(color=factor(date)), se=FALSE, size=0.5)+
#     geom_line(data=rq_fit, mapping = aes(x=percentile, Elevation), color='blue', size=1)+
#     geom_point(data=t, color='black', size=0.5)+
#     xlab('Lake Surface Occurence Percentile')+
#     ylab('Elevation [m a.s.l.]')+
#     ggtitle('All Groundtracks and filtered')
#   return(surface_occurence_vs_elevation_plot)
# }

remove_outliers <- function(x, na.rm = TRUE, ...) {
  qnt <- quantile(x, probs=c(.025, .975), na.rm = na.rm, ...)
  H <- 1.5 * IQR(x, na.rm = na.rm)
  y <- x
  y[x < (qnt[1] - H)] <- NA
  y[x > (qnt[2] + H)] <- NA
  y
}

