# Supplement to Shevnina 2023
# Authorship: Elena Shevnina, eshevnina@gmail.com
# phone: +358449185446

library(moments)
library(lubridate)

################################# DATA #########################################################################
################################################################################################################
######################################################################################################################
### GRDC 
#infile<-"6998400_Q_Day.Cmd.txt"
data<-read.table(infile, skip=36, sep=';', header=T)
data$year<-as.numeric(substr(data$YYYY.MM.DD,1,4))
data$newTS<-as.Date(data$YYYY.MM.DD, "%Y-%m-%d")

data$Value[data$Value == -999] <-NA                                                          ### to replace missing values with NA
miss<-aggregate(Value ~ year, data=data, function(x) {sum(is.na(x))}, na.action = NULL)      ### to count missing values

data$wd_day<-data$Value                                                                      ### to copy the daily water discharge
data$Value[data$Value == 0] <-NA                                                             ### to replace 0 with NA: for calculation of mean year water discharge

data$wv<-data$wd_day*60*60*24                                                                ### to calculate volume of runoff in each day

wd_max<-aggregate(list(max_day = data$Value), list(cut(data$newTS, "1 year")), max, na.rm=TRUE) 
wd_mean<-aggregate(list(mean_day = data$Value), list(cut(data$newTS, "1 year")), mean, na.rm=TRUE) 
vol<-aggregate(list(x = data$wv), list(cut(data$newTS, "1 year")), sum, na.rm=TRUE) 

miss$wd_max<-wd_max$max_day
miss$wd_mean<-wd_mean$mean_day
miss$r_km3=vol$x/1000000000 
miss$r_mm=vol$x*1000/A

rm(vol)
rm(wd_max)
rm(wd_mean)


#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6730501_Q_Day.Cmd.txt" # Tana
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6832755_Q_Day.Cmd.txt" # Torniojoki
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6830104_Q_Day.Cmd.txt" # Juutuanjoki
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6854104_Q_Day.Cmd.txt" # Kokemäenjoki or Muroleenkoski
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6854115_Q_Day.Cmd.txt"  # Vantaajoki at Oulunkyla
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6855480_Q_Day.Cmd.txt"   # Lieksanjoki at Ruunaa
#infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/6854590_Q_Day.Cmd.txt" # Oulujoki

### WD for 2018-2022 are provided in separate source
library("readxl")
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Finland/Dischargedata220922.xlsx" ### SYKE, September 2022
Lieksa_wd<-read_excel(infile, sheet=2, skip=1)
Kokemaen_wd<-read_excel(infile, sheet=3)
Oulun_wd<-read_excel(infile, sheet=4)
Tornion_wd<-read_excel(infile, sheet=5)

#A=14160000000                                                     ### Catchment area, m2   tana
#A=39000000000                                                     ### Torniojoki Karunki
#A=5160000000                                                        ### Juutuanjoki
#A=6100000000                                                        ### Kokemaenjoki  
#A = 2045000000                                                      ### Vantaajoki
#A= 6260000000                                                       ### Lieksanjoki
#A= 2045000000                                                        ### Oulujoki
#####################################################################################################################
# Missing data for the years
### 1941, 1945 and 1991 Tana
### 1930 Kokemaenjoki
### 1985, 1994, 1995 Fjonska


######################################################################################################################
######################################################################################################################
### selection of the dates of flooding begin, maximum and end in particular year 
## x is list with 6 fields
## area is in m2 
## A and B are the empirical coefficients (Shevnina, 2013)

spring_flooding<- function (x, area, A, B) 
{ 
      x1<-sel$wd_day[2:length(sel$wd_day)]                                      ## Q(t+1)
      x2<-sel$wd_day[3:length(sel$wd_day)]                                      ## Q(t+2)  
      x1[length(x1)+1]<-sel$wd_day[length(sel$wd_day)]                          ## to fit the length of the array
      x2[length(x2)+1]<-sel$wd_day[length(sel$wd_day)] 
      x2[length(x2)+1]<-sel$wd_day[length(sel$wd_day)]
      sel$wd_1<-x1                                                              ## 
      sel$wd_2<-x2
      sel$delta<-sel$wd_1-sel$wd_day                                            ##  x[t-1]-x[t]   ### D(t)
      sel$delta_1<-sel$wd_2-sel$wd_1                                            ## x[t-2]-x[t-1]
      
      ## wd max and its date
      max_wd <- max(sel$wd_day, na.rm=TRUE)                                     ## date of flooding max
      z<-sel[sel$wd_day == max_wd, ]
      dfmax<-z$newTS[1] 
      
      doy_fb1<-yday(strftime("2008-04-16",format="%Y-%m-%d"))      
      doy_fe1<-yday(strftime("2008-07-16",format="%Y-%m-%d"))
      
      fl <-0                  ## non spring floods by default
      if (dfmax >= sel$newTS[doy_fb1] && dfmax <= sel$newTS[doy_fe1])
      { fl <- 1 }        ## spring period
      
      Qmezh_mean<-mean(sel$wd_day[1:60])                                        ## the mean water discharge in January-February
      
      ## date of spring flood begin
      x<-sel[sel$delta >= A, ]                                                  ## date of flooding begin (Shevnina, 2013)
      ## XX
      dfb<-x$newTS[1]
     
     ## source of yearly maximum floods      
    if (fl == 1)                                                                ## maximum wd during spring flood
      {  z<-sel[sel$newTS >= as.Date(dfmax), ] 
      } else {
        z<-sel[sel$newTS >= as.Date(dfb), ]}                                    ## maximum wd non in spring
       
    
      ## date of spring flood ends  
      Qmezh_min<-B*Qmezh_mean                                                   ## the mean water discharge in January-February 
      y<-z[z$delta > 0 & z$delta_1 <0 & z$wd_day <= Qmezh_min, ]                ## date of flood ends (Shevnina, 2013)
      dfe<-y$newTS[1]
      len<-as.numeric(as.Date(dfe)-as.Date(dfb))
      
                    
    
      ## volume of spring flood (in depth of runoff, mm), annual runoff depth
      year<-sel$year[1]
      sel$wv<-sel$wd_day*60*60*24
      subset_wv<-subset(sel$wv, sel$newTS >= dfb & sel$newTS <= dfe)            ## spring flooding runoff depth
      FRD<-(sum(subset_wv)*1000)/area                                           ## spring flood runoff depth
      YRD<-(sum(sel$wv)*1000)/area                                              ## Annual runoff depth
      sf<-subset(sel$wd_day, sel$newTS>=dfb & sel$newTS<=dfe)   
      sfw<-max(sf)                                                              ## maximum daily water discharge during a spring flood
      
      df<-data.frame(year,as.character(dfb), as.character(dfe), as.character(dfmax), max_wd, len, FRD, sfw, YRD, as.character(fl))
      names(df)<-c("year", "DFB", "DFE","DFMax", "QMax", "Length","FRD", "QSFmax", "YRD","Ftype")
      return(df)
}

## end of function


## Controlling tools
#################################################################################################################
#sel<-subset(data, data$year == 2012 | data$year == 2013 | data$year == 2014 )

sel<-subset(data, data$year == 2017)
tmp<-spring_flooding(sel,area, 2.5,8)
plot(sel$newTS, sel$wd_day, main= as.character(sel$year[1]), type="l", ylab="Q, m3s-1", xlab="Month")
abline(v=as.Date(tmp$DFB), col="red")
abline(v=as.Date(tmp$DFE), col="red")
abline(v=as.Date(tmp$DFMax), col="black")

#######################################################################
df<-data.frame(year, dfb, dfe, len, dfm, max_wd, FRD, YRD)
names(df)<-c("year", "DFB", "DFE", "Length", "DFMax", "QMax", "FRD", "YRD")

######################################################################################################################
######################################################################################################################
### Floods for Finland
write.table(tana, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Tana_floods.csv", sep =",") ##
write.table(tornio, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Torniojoki_floods.csv", sep =",") ##
write.table(df, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Juutuanjoki_floods.csv", sep =",") ##
write.table(df, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Vantaanjoki_floods.csv", sep =",") ##
write.table(df, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Lieksanjoki_floods.csv", sep =",") ## 
write.table(df, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Kokemaenjoki_floods.csv", sep =",") ##
write.table(df, file = "/home/shevnina/Manuscripts/2022/Floods/data/Finland/Oulujoki_floods.csv", sep =",") ##
##############################################################################################################
##############################################################################################################

## to calculate the volume, spring flood and annual runoff (in depth) 
## sel is the daily water discharges in selected year, area is in m2
spring_flooding_vol<- function (sel, area, dfb, dfe) 
{
  year<-sel$year[1]
  sel$wv<-sel$wd_day*60*60*24
  subset_wv<-subset(sel$wv, sel$newTS >= dfb & sel$newTS <= dfe)               ## Flooding period
  FRD<-(sum(subset_wv)*1000)/area                                               ## Spring flood runoff depth
  YRD<-(sum(sel$wv)*1000)/area                                                  ## Annual runoff depth
  
  sf<-subset(sel$wd_day, sel$newTS>=dfb & sel$newTS<=dfe)                      
  sfw<-max(sf)                                                                  ## Maximum daily water discharge in spring flooding period 
  
  df<-data.frame(year, FRD, sfw, YRD)
  names(df)<-c("year", "FRD", "QSFmax", "YRD")
  return(df)
}
## end of function

###################################################################################################################
#### to calculate the statistic of the flooding period, x is the name of the file where the data is stored
mean_flooding<-function (x)
  {
 #  floods<-read.table(x, sep =",", header=T)
   floods <- x
   floods$DFB_date<-as.POSIXlt(floods$DFB, format = "%Y-%m-%d")
   floods$DFE_date<-as.POSIXlt(floods$DFE, format = "%Y-%m-%d")
   floods$DFMax_date<-as.POSIXlt(floods$DFMax, format = "%Y-%m-%d")

   floods$DFB_doy<-as.numeric(strftime(floods$DFB_date, format = "%j"))
   floods$DFE_doy<-as.numeric(strftime(floods$DFE_date, format = "%j"))
   floods$DFMax_doy<-as.numeric(strftime(floods$DFMax_date, format = "%j"))
   
   lfp<-mean(floods$Length, na.rm=T)
   
   dfb<-as.Date(mean(floods$DFB_doy, na.rm=T), origin = "2014-01-01") 
   dfmax<-as.Date(mean(floods$DFMax_doy, na.rm=T), origin = "2014-01-01")
   dfe<-as.Date(mean(floods$DFE_doy, na.rm=T), origin = "2014-01-01")

   qmax<-mean(floods$QMax, na.rm=TRUE)
   frd<-mean(floods$FRD, na.rm=TRUE)
   yrd<-mean(floods$YRD, na.rm=TRUE)
   res<-data.frame(dfb, dfmax,dfe, lfp, qmax, frd, yrd, frd/yrd)
   return(res)
}

## end of function



#####################################################################
#### Russian stations by 2007 after Shevnina (2013)
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Yemca.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Pinega.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Pechora.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Vim.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Sula.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Ponoy.dat"
infile<-"/home/shevnina/Manuscripts/2022/Floods/data/Russian/Solza.dat"

floods<-read.table(infile, sep =" ", header=F)
floods$DFB_date<-as.POSIXlt(floods$V2, format = "%Y-%m-%d")
floods$DFE_date<-as.POSIXlt(floods$V3, format = "%Y-%m-%d")
floods$length<-as.numeric(floods$V4)
floods$year<-as.numeric(substr(floods$V2,1,4))
floods$FRD<-as.numeric(floods$V6)
floods$Qmax<-as.numeric(floods$V5)
floods_sorted<-floods[order(floods$year, decreasing = FALSE),]  ### sorted by the year
floods$DFB_doy<-as.numeric(strftime(floods$DFB_date, format = "%j"))
floods$DFE_doy<-as.numeric(strftime(floods$DFE_date, format = "%j"))
as.Date(mean(floods$DFB_doy, na.rm=T), origin = "2014-01-01")
as.Date(mean(floods$DFE_doy, na.rm=T), origin = "2014-01-01")
mean(floods$V5, na.rm=TRUE)               ## daily maximum water discharge
mean(floods$V6, na.rm=TRUE)               ## spring flooding runoff depth
mean(floods$length, na.rm=TRUE)

#area<-10200000000 ## ponoy

#############################################################################
### water discharges extracted from https://gmvo.skniivh.ru/index.php?id=1
### October 2022, 2008-2020
### xml to csv
### sed -i "s/ ^//g" Pechora.csv
### sed -i "s/,/./g" Pechora.csv 
### sed -i "s/_//g" Pechora.csv 


## Russian sites ONLY!
## Function to read and convert the data XLS to CSV + filtering
get_data <- function (xx_wd) 
{
## jan
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$jan[4]
dat<-xx_wd$head[5:35]
wd<-xx_wd$jan[5:35]
date<-paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
   { k<-53*i                  
   year<-xx_wd$jan[(k+1)]  
   month<-xx_wd$jan[(k+4)]
     wd<-xx_wd$jan[(k+5):(k+35)] 
     date<- paste(year,month,dat,sep="-")
     str<-paste(date, wd, sep=" ")
     
       if (( i %% 2) == 0) {
             dis<-append(dis1,str)}  else  { 
                   dis1<-append(dis,str) }
     }
JAN<-dis

## feb
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$feb[4]
wd<-xx_wd$feb[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i    
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$feb[(k+4)]
wd<-xx_wd$feb[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) } 
}
FEB<-dis

## mar
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$mar[4]
wd<-xx_wd$mar[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
   { k<-53*i                  
   year<-xx_wd$jan[(k+1)]  
   month<-xx_wd$mar[(k+4)]
   wd<-xx_wd$mar[(k+5):(k+35)] 
   date<- paste(year,month,dat,sep="-")
   str<-paste(date, wd, sep=" ")
     
       if (( i %% 2) == 0) {
             dis<-append(dis1,str)}  else  { 
                   dis1<-append(dis,str) }
     }
MAR<-dis

## apr
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$apr[4]
wd<-xx_wd$apr[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")
for (i in 1:12)  ## 2009-2020
 { k<-53*i                  
  year<-xx_wd$jan[(k+1)]  
  month<-xx_wd$apr[(k+4)]
  wd<-xx_wd$apr[(k+5):(k+35)] 
  date<- paste(year,month,dat,sep="-")
  str<-paste(date, wd, sep=" ")
 
if (( i %% 2) == 0) {
     dis<-append(dis1,str)}  else  { 
       dis1<-append(dis,str) }
}
APR<-dis

## may
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$may[4]
wd<-xx_wd$may[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
  { k<-53*i                  
  year<-xx_wd$jan[(k+1)]  
  month<-xx_wd$may[(k+4)]
  wd<-xx_wd$may[(k+5):(k+35)] 
  date<- paste(year,month,dat,sep="-")
  str<-paste(date, wd, sep=" ")
   
if (( i %% 2) == 0) {
     dis<-append(dis1,str)}  else  { 
         dis1<-append(dis,str) }
  }

 MAY<-dis

## jun
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$jun[4]
wd<-xx_wd$jun[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")
 for (i in 1:12)  ## 2009-2020
    { k<-53*i                  
     year<-xx_wd$jan[(k+1)]  
     month<-xx_wd$jun[(k+4)]
      wd<-xx_wd$jun[(k+5):(k+35)] 
      date<- paste(year,month,dat,sep="-")
     str<-paste(date, wd, sep=" ")
      
     if (( i %% 2) == 0) {
           dis<-append(dis1,str)}  else  { 
               dis1<-append(dis,str) } 
 }
JUN<-dis

## jul
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$jul[4]
wd<-xx_wd$jul[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
 { k<-53*i                  
   year<-xx_wd$jan[(k+1)]  
   month<-xx_wd$jul[(k+4)]
   wd<-xx_wd$jul[(k+5):(k+35)] 
   date<- paste(year,month,dat,sep="-")
   str<-paste(date, wd, sep=" ")
  
if (( i %% 2) == 0) {
    dis<-append(dis1,str)}  else  { 
       dis1<-append(dis,str) }
}
JUL<-dis

## aug
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$aug[4]
wd<-xx_wd$aug[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i                  
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$aug[(k+4)]
wd<-xx_wd$aug[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) }
}
AUG<-dis

## sep
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$sep[4]
wd<-xx_wd$sep[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i                  
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$sep[(k+4)]
wd<-xx_wd$sep[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) }
}
SEP<-dis


## oct
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$oct[4]
wd<-xx_wd$oct[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i                  
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$oct[(k+4)]
wd<-xx_wd$oct[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) }
}
OCT<-dis

## nov
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$nov[4]
wd<-xx_wd$nov[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i                  
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$nov[(k+4)]
wd<-xx_wd$nov[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) }
}
NOV<-dis

## dec
year<-xx_wd$jan[1]          ## 2008 
month<-xx_wd$dec[4]
wd<-xx_wd$dec[5:35]
rm(dis,dis1)
date<- paste(year,month,dat,sep="-")
dis<-paste(date, wd, sep=" ")

for (i in 1:12)  ## 2009-2020
{ k<-53*i                  
year<-xx_wd$jan[(k+1)]  
month<-xx_wd$dec[(k+4)]
wd<-xx_wd$dec[(k+5):(k+35)] 
date<- paste(year,month,dat,sep="-")
str<-paste(date, wd, sep=" ")

if (( i %% 2) == 0) {
  dis<-append(dis1,str)}  else  { 
    dis1<-append(dis,str) }
}
DEC<-dis

tmp<-c(JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC)
# return (tmp)


#################################
dt<-c(0)
vl<-c(0)
for (i in 1:length(tmp)) 
{  qq<-unlist(strsplit(tmp[i], " "))
dt[i]<-qq[1]
vl[i]<-qq[2]
}
res<-data.frame(dt,vl)
res$newDate<-as.Date(res$dt)
res$newValue<-as.numeric(res$vl)
return (res)
} 
## end of function


## to common format in intial data for calculation of spring flood characteristic
## Russian sites ONLY!
res2data <- function(x)
{
  x$newTS<-x$newDate
  x$wd_day<-x$newValue
  x$year<-as.numeric(substr(x$newDate,1,4))
  return(x)
}

