# Supplement to Shevnina, 2023.
# Authorship: Elena Shevnina: eshevnina@gmail.com
# phone: +358449185446

library(lubridate)
library(moments)

##################### Data #####################################################
########################################################################################################################
### spring and annual floods from the supplement: elena.shevnina@fmi.fi
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Torniojoki_sp.csv", sep =",", header=T)  ##1
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/OulunjokiSP.txt", sep =",", header=TRUE) ##2
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/KokemaenjokiSP.txt", sep =",", header=TRUE) ##3
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Vantaanjoki_sp.csv", sep =",", header=TRUE) ##4
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/LieksanjokiSP.txt", sep =",", header=T)  ##5 
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/TanaSP.txt", sep =",", header=T)    ##6
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/JuutuanjokiSP.txt", sep =",", header=T) ##7
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Ponoy_sp.txt", sep =",", header=T) ##8
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Pinega_sp.csv", sep =",", header=T)  ##9
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Pechora_sp.csv", sep =",", header=T)  ##10
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/VimSP.txt", sep =",", header=T)  ##11
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Sula_sp.csv", sep =",", header=T)  ##12

##########################################################################################################
floods$DFB_date<-as.POSIXlt(floods$DFB, format = "%Y-%m-%d")
floods$DFE_date<-as.POSIXlt(floods$DFE, format = "%Y-%m-%d")
floods$DFmax_date<-as.POSIXlt(floods$DFMax, format = "%Y-%m-%d")
floods$DFB_doy<-as.numeric(strftime(floods$DFB_date, format = "%j"))
floods$DFE_doy<-as.numeric(strftime(floods$DFE_date, format = "%j"))
floods$DFmax_doy<-as.numeric(strftime(floods$DFmax_date, format = "%j"))

#####################################################
floods$FType<-ifelse(floods$DFB_doy <= floods$DFmax_doy, 1, 0)
write.table(floods, file="/home/shevnina/Manuscripts/2023/floods/supplement/TanaSP.txt", sep =",")
######################################################

sel1<-subset(floods, floods$year <= 2000)
sel2<-subset(floods, floods$year > 2000)
sum(sel1$Ftype)/length(sel1$Ftype)
##[1] 0.8125
sum(sel2$Ftype)/length(sel2$Ftype)
#[1] 0.4285714
sum(floods$Ftype)/length(floods$Ftype)
#[1] 0.7176471

#######################################################################################################################
##### FIGURES
##############################################################################################################
## Figure 2 a: Tornio River
sel<-subset(data, data$year == 2005)
timing <- tornio_timing
plot(sel$newTS, sel$wd_day, type="l", ylab="Q, m3s-1", xlab="2005", col="blue")

plot(sel$newTS, sel$wd_day, xlab="2005", ylab="Q, m3s-1", ylim=c(0,3000), axes=FALSE, type="l", col="blue")
axis.Date(1, at=seq(as.Date("2005/01/01"), as.Date("2005/12/31"), by="30 day"), format="%m/%d", cex.lab=1.5)
axis(2,  ylim=c(0,1700), col="black",col.axis="black",las=1)

grid()
box()
abline(h=0, col="black")

for (i in 1:length(timing$DFB)) {
  if (as.numeric(substr(timing$DFB[i], 1,4)) == year) {
    dfb <- as.Date.POSIXct(timing$DFB[i], "%Y-%m-%")
    dfe <- as.Date.POSIXct(timing$DFE[i], "%Y-%m-%")
    dfmax <- as.Date.POSIXct(timing$DFMax[i], "%Y-%m-%")
  } 
}

abline(v=as.Date(dfb),col="red")
abline(v=as.Date(dfe),col="red")sjuu
abline(v=as.Date(dfmax), col="black")
abline(v=as.Date("2005-03-01"), col="grey")
abline(v=as.Date("2005-06-01"), col="grey")
abline(v=as.Date("2005-09-01"), col="grey")
abline(v=as.Date("2005-12-01"), col="grey")

## Figure 2 b: Vantaanjoki River
sel<-subset(data, data$year == 2005) ## data include adily wd, Vantaan River
timing <- vantaa_timing
plot(sel$newTS, sel$wd_day, type="l", ylab="Q, m3s-1", xlab="2005", col="blue")

plot(sel$newTS, sel$wd_day, xlab="2005", ylab="Q, m3s-1", ylim=c(0,3000), axes=FALSE, type="l", col="blue")
axis.Date(1, at=seq(as.Date("2005/01/01"), as.Date("2005/12/31"), by="30 day"), format="%m/%d", cex.lab=1.5)
axis(2,  ylim=c(0,1700), col="black",col.axis="black",las=1)

grid()
box()
abline(h=0, col="black")

for (i in 1:length(timing$DFB)) {
  if (as.numeric(substr(timing$DFB[i], 1,4)) == year) {
    dfb <- as.Date.POSIXct(timing$DFB[i], "%Y-%m-%")
    dfe <- as.Date.POSIXct(timing$DFE[i], "%Y-%m-%")
    dfmax <- as.Date.POSIXct(timing$DFMax[i], "%Y-%m-%")
  } 
}

abline(v=as.Date(dfb),col="red")
abline(v=as.Date(dfe),col="red")
abline(v=as.Date(dfmax), col="black")
abline(v=as.Date("2005-03-01"), col="grey")
abline(v=as.Date("2005-06-01"), col="grey")
abline(v=as.Date("2005-09-01"), col="grey")
abline(v=as.Date("2005-12-01"), col="grey")


### Figure 3 
data <- data.frame(value=c(72,77,85,97,86))  
value2<-c(81,82,86,100,86)                   
value3<-c(43,63,81,98,86)
data<-cbind(data,value2)
data<-cbind(data,value3)
# data
#name value value2 value3
#1   Vantaajoki    72     81     43
#2 Kokemaenjoki    77     82     63
#3  Lieksanjoki    85     86     81
#4 Oulunjoki 97 100 98
#5 Vim 86 86 86
matx<-data.matrix(data)
barplot(t(matx), beside = T, cex.axis=0.7, ylim=c(1,100), axes = TRUE, names.arg = c("Vantaanjoki", "Kokemaenjoki", "Lieksanjoki", "Ouluunjoki", "Vim"),cex.names=0.75, col=c("grey", "green","red"), density=45)


### Figure 4
## (a)
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/KokemaenjokiSP.txt", sep =",", header=TRUE) ##3
ey <- expression(Q[max]~plain(m^3~s^-1))
plot(floods$year,floods$QMax, axes=TRUE, xlab="Year", ylab=ey, type="b", pch=19, col="red")   # mean
abline(v="1993", col="black")
grid()

sub1<-subset(floods, floods$year<=1993)
sub2<-subset(floods, floods$year>=1994)

abline(h=mean(sub1$QMax), col="orange")
abline(h=mean(sub2$QMax), col="green")

max1<-max(sub1$QMax)
min1<-min(sub1$QMax)
abline(h=min1, lty=2, col="orange")
abline(h=max1, lty=2, col="orange")

max2<-max(sub2$QMax)
min2<-min(sub2$QMax)
abline(h=min2, lty=2, col="green")
abline(h=max2, lty=2, col="green")

## (b)
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/LieksanjokiSP.txt", sep =",", header=TRUE) ##
plot(floods$year,floods$FRD, axes=TRUE, xlab="Year", ylab="FRD, mm", type="b", pch=19, col="blue")   # mean
abline(v="1983", col="black")
grid()

sub1<-subset(floods, floods$year<=1982)
sub2<-subset(floods, floods$year>=1983)

abline(h=mean(sub1$FRD), col="orange")
abline(h=mean(sub2$FRD), col="green")

max1<-max(sub1$FRD)
min1<-min(sub1$FRD)
abline(h=min1, lty=2, col="orange")
abline(h=max1, lty=2, col="orange")

max2<-max(sub2$FRD)
min2<-min(sub2$FRD)
abline(h=min2, lty=2, col="green")
abline(h=max2, lty=2, col="green")


## Figure 5
## (a) 
#floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/LieksanjokiSP.txt", sep =",", header=TRUE) ##
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/TanaSP.txt", sep =",", header=T)    ##6
sub1<-subset(floods, floods$year<=1952)
sub2<-subset(floods, floods$year>=1953)
p1<-hist(sub1$FRD, breaks = 7)
p2<-hist(sub2$FRD, breaks = 14)
plot(p1, col=rgb(0,0,1,1/4), main="", ylim=c(0,15), xlim=c(50,350), xlab="FRD, mm")
plot(p2,col=rgb(1,0,0,1/4),add=T)

## (b)
floods<-read.table("/home/shevnina/Manuscripts/2023/floods/supplement/Ponoy_sp.txt", sep =",", header=T) ##8
sub1<-subset(floods, floods$year<=1975)
sub2<-subset(floods, floods$year>=1976)
p1<-hist(sub1$FRD, breaks = 8)
p2<-hist(sub2$FRD, breaks = 8)
plot(p1, col=rgb(0,0,1,1/4), main="", ylim=c(0,10), xlim=c(50,350),xlab="FRD, mm")
plot(p2,col=rgb(1,0,0,1/4),add=T)



####################### Functions ########################################################################
## formulas are given according to Rozhdestvenskiy and Chebotarev, 1974
get_estimators<-function(x)
{
  l<-length(x)
  m1<-mean(x, na.rm=T)  
  sd_m1<-sd(x,na.rm=T)/sqrt(l)
  
  R <- x/m1
  sum1 <- sum((R-1)**2, na.rm=T)
  CV <- sqrt(sum1/(l-1))
  # CV<-sd(x, na.rm=T)/m1  
  sd_cv <- (CV/sqrt(l))*(sqrt(1+CV**2))
  
  sum2 <- sum((x-m1)**3, na.rm=T)
  CS <- sum2/(l*sd(x, na.rm=T)**3)
  sd_cs <- sqrt((6/l)*(1+CV**2))
  #CS<-skewness(x, na.rm=T)
  
  b<-acf(x, 1, na.action = na.pass)
  #r1<-b$acf[2]        ## autocorrelation with lag=1
  case<-data.frame(l, b$acf[2], m1, sd_m1, CV, sd_cv, CS, sd_cs)               
  names(case)<-c("l","r1","m1","sd_m1","CV", "sd_cv", "CS", "sd_cs")
  return(case)
}


### Floating window/point
stat_floating_point30<- function(x, y) {
  ## x- variable
  ## y - year
  n<-30
  lim<-as.integer(length(x)-n)
  eyear<-y[n]                    ## the year which ends the first of two floating periods
  
  i<-1                               ## initialization of the dataframe with the results
  t<-"t.test"
  tmp<-t.test(x[1:n],x[(n+1):lim+n], na.rm=T)
  df<-data.frame(t, n, i, eyear, tmp$statistic, tmp$p.value)
  names(df)<-c("test","n","i","year", "t.value", "p.value")
  
  #Student test
  for (i in 2:(lim-n)) 
  { tmp<-t.test(x[1:(i+n-1)],x[(i+n):(lim+n)], na.rm=T)
  if (tmp$p.value < 0.05) {
    df<-rbind(df, c(t,n, i, y[i+n], tmp$statistic, tmp$p.value)) }
  }
  #### Kolmogorov smirnov test
  t<-"ks.test"
  for (i in 2:(lim-n)) 
  {
    tmp<-ks.test(x[1:(i+n-1)],x[(i+n):(lim+n)], na.rm=T)
    if (tmp$p.value < 0.05) {
      df<-rbind(df, c(t, n, i, y[i+n], tmp$statistic, tmp$p.value)) }
  }
  return(df)
}

stat_floating_point15<- function(x, y) {
  ## x- variable
  ## y - year
  n<-15
  lim<-as.integer(length(x)-n)
  eyear<-y[n]                    ## the year which ends the first of two floating periods
  
  i<-1                               ## initialization of the dataframe with the results
  t<-"t.test"
  tmp<-t.test(x[1:n],x[(n+1):lim+n], na.rm=T)
  df<-data.frame(t, n, i, eyear, tmp$statistic, tmp$p.value)
  names(df)<-c("test","n","i","year", "t.value", "p.value")
  
  #Student test
  for (i in 2:(lim-n)) 
  { tmp<-t.test(x[1:(i+n-1)],x[(i+n):(lim+n)], na.rm=T)
  if (tmp$p.value < 0.05) {
    df<-rbind(df, c(t,n, i, y[i+n], tmp$statistic, tmp$p.value)) }
  }
  
  return(df)
}

#############################################
## Volume of spring flood and its duration; max water discharge in spring flood period
spring_flooding_vol<- function (sel, area, dfb, dfe) 
{
  year<-sel$year[1]
  sel$wv<-sel$wd_day*60*60*24
  subset_wv<-subset(sel$wv, sel$newTS >= dfb & sel$newTS <= dfe)               ### flooding period
  FRD<-(sum(subset_wv)*1000)/area                                               ## Spring flood runoff depth
  YRD<-(sum(sel$wv)*1000)/area                                                  ## Annual runoff depth
  
  sf<-subset(sel$wd_day, sel$newTS>=dfb && sel$newTS<=dfe)                      
  sfw<-max(sf)                                                                  ## maximum daily water discharge in spring flooding period 
  
  df<-data.frame(year, FRD, sfw, YRD)
  names(df)<-c("year", "FRD", "QSFmax", "YRD")
  return(df)
}
## end of function
### recalculation of the FRD and YRD for torniojoki

for (i in 1:length(timing$year)) 
{
  sel<-subset(data, data$year == timing$year[i]) ### for each year in 2008-2020
  tmp<-rbind(tmp,spring_flooding_vol(sel,area,timing$DFB[i],timing$DFE[i]))
}

mean_flooding<-function (x)
{
  #  floods<-read.table(x, sep =",", header=T)
  floods <- x
  floods$DFB_date<-as.POSIXlt(floods$DFB, format = "%Y-%m-%d")
  floods$DFE_date<-as.POSIXlt(floods$DFE, format = "%Y-%m-%d")
  floods$DFMax_date<-as.POSIXlt(floods$DFMax, format = "%Y-%m-%d")
  
  floods$DFB_doy<-as.numeric(strftime(floods$DFB_date, format = "%j"))
  floods$DFE_doy<-as.numeric(strftime(floods$DFE_date, format = "%j"))
  floods$DFMax_doy<-as.numeric(strftime(floods$DFMax_date, format = "%j"))
  
  lfp<-mean(floods$Length, na.rm=T)
  
  dfb<-as.Date(mean(floods$DFB_doy, na.rm=T), origin = "2014-01-01") 
  dfmax<-as.Date(mean(floods$DFMax_doy, na.rm=T), origin = "2014-01-01")
  dfe<-as.Date(mean(floods$DFE_doy, na.rm=T), origin = "2014-01-01")
  
  qmax<-mean(floods$QMax, na.rm=TRUE)
  frd<-mean(floods$FRD, na.rm=TRUE)
  yrd<-mean(floods$YRD, na.rm=TRUE)
  res<-data.frame(dfb, dfmax,dfe, lfp, qmax, frd, yrd, frd/yrd)
  return(res)
}

#####################################################################################################################
## to estimate how many floods happen during the spring flood period in rivers located in finland

get_source_flood <- function (x)
{
  timing <- x
  timing$year <- as.numeric(substr(timing$DFB,1,4))
  s <- sum(as.numeric(timing$Ftype))
  l <- length(timing$Ftype)
  portion <- s/l
  
  subset_by2000 <- subset(timing$Ftype, timing$year <= 2000)
  subset_after2000 <- subset(timing$Ftype)
  portion1 <- sum(as.numeric(subset_by2000))/length(subset_by2000)
  portion2 <- sum(as.numeric(subset_after2000))/length(subset_after2000)
  
  df <-data.frame(portion, portion1, portion2)
  return(df)
}



