Solutions to the provided tasks

Task 1: Find a function that can be used to calculate the standard deviation in R and use the function.

?sd
sd(c(3,4,2,54))
## [1] 25.51307

Task 2: Find a function to plot a graph (any graph) in R and use the function.

?plot
plot(c(3,5,4),c(4,3,5))

Task 3: Find a function with which you can generate a sequence of numbers, say 2,4,6,8,10, etc., and use this function to calculate a sequence that has 50 elements, whose initial value is 2, and whose step is also 2.

?seq
seq(from=2,by=2,length.out=50)
##  [1]   2   4   6   8  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38
## [20]  40  42  44  46  48  50  52  54  56  58  60  62  64  66  68  70  72  74  76
## [39]  78  80  82  84  86  88  90  92  94  96  98 100

Task 4: Find a function to round numbers and round the number 2.464646 to 2 decimal places.

?round
round(x=2.464646, digits = 2)
## [1] 2.46

Task 5: Suppose you measure rainfall every day of the week (Monday to Sunday) at 2 rainfall stations; the first station measured 10,0,0,20,15,10,5 mm of rainfall and the second station measured 5,0,0,25,10,5,10 mm of rainfall. Store the data in 2 separate objects. Then, using the objects, calculate the rainfall values for all 7 days: Calculate the average precipitation taking into account both stations (station1 and station2), calculate the average precipitation over the whole week (for all days combined), calculate the maximum daily and minimum daily precipitation given the measurements from both stations, calculate the range of the measured values (range), and calculate the median for each precipitation station and the standard deviation for each precipitation station.

pos1 <- c(10,0,0,20,15,10,5)
pos2 <- c(5,0,0,25,10,5,10)
(pos1+pos2)/2
## [1]  7.5  0.0  0.0 22.5 12.5  7.5  7.5
mean(pos1)
## [1] 8.571429
mean(pos2)
## [1] 7.857143
mean((pos1+pos2)/2)
## [1] 8.214286
summary(c(pos1,pos2))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.250   7.500   8.214  10.000  25.000
range(c(pos1,pos2))
## [1]  0 25
median(pos1)
## [1] 10
sd(pos1)
## [1] 7.480132
median(pos2)
## [1] 5
sd(pos2)
## [1] 8.591247

Task 6: Find a function to generate random numbers (based on a uniform distribution) and generate 10 random numbers between 1 and 35. Store the results in a new object and round to 1 decimal place, calculate the sum of all the generated numbers, and calculate the product of all the generated values, then sort the generated values from smallest to largest.

gen <- runif(n=10,min=1,max=35)
round(gen,digits = 1)
##  [1]  4.7 16.4 13.0 31.0 19.9  3.4 10.7 10.8 26.4  9.7
prod(gen)
## [1] 61066741552
sort(gen)
##  [1]  3.365137  4.680054  9.655346 10.702704 10.801369 12.970341 16.426182
##  [8] 19.923106 26.405479 30.993581

Task 7: Calculate the sum of all integers between 1 and 10 000.

sum(1:10000)
## [1] 50005000

Task 8: Using the rainfall example from Task 5, check on which days at station 1 the average rainfall was more than 10 mm and less than 20 mm.

which(pos1>10 & pos1<20)
## [1] 5

Task 9: It was subsequently discovered that the measured rainfall for station 1 on Thursday was incorrect and should have been 30 mm. Check whether this change affects the result of task 8.

pos1[4] <- 30
which(pos1>10 & pos1<20)
## [1] 5

Task 10: Combine the precipitation data from the two stations given in Task 5 into a matrix. Add a third column to show the average rainfall values at the two stations (by day). Add row labels (names of days of the week) and column labels (station 1, station 2, average) using the colnames and rownames functions.

pos1 <- c(10,0,0,20,15,10,5)
pos2 <- c(5,0,0,25,10,5,10)
m1 <- cbind(pos1,pos2)
m1 <- cbind(m1,(pos1+pos2)/2)
rownames(m1) <- c("M","T","W","T","F","S","S")
colnames(m1) <- c("Station 1", "Station 2", "Mean")
m1
##   Station 1 Station 2 Mean
## M        10         5  7.5
## T         0         0  0.0
## W         0         0  0.0
## T        20        25 22.5
## F        15        10 12.5
## S        10         5  7.5
## S         5        10  7.5

Task 11: Try to use the apply function on the previously defined matrix (task 10) to calculate the average and mean values at station 1 and station 2.

apply(m1,2,mean)
## Station 1 Station 2      Mean 
##  8.571429  7.857143  8.214286

Task 12: Using the airquality object (you need the data(airquality) function to use the data), check on which days the wind speed was greater than 10 mph (the units in which the wind speed is given), check on which days the air temperature was between 60 and 70 F (the units in which the air temperature is given), and check on which day the maximum and minimum ozone concentrations were measured.

data(airquality)
which(airquality$Wind>10)
##  [1]   3   4   5   6   8   9  14  15  16  17  18  19  22  24  25  26  28  29  34
## [20]  37  40  41  42  45  46  47  48  50  51  58  59  60  65  67  73  74  75  76
## [39]  78  81  84  88  94 100 103 104 105 107 108 111 112 113 114 115 129 130 131
## [58] 132 134 135 137 138 140 141 142 144 146 147 148 150 151 153
which(airquality$Temp>60 & airquality$Temp<70)
##  [1]   1   4   6   7   9  10  12  13  14  16  17  19  20  23  24  28  34  49 140
## [20] 142 144 147 148 153
which.max(airquality$Ozone)
## [1] 117
which.min(airquality$Ozone)
## [1] 21

Task 13: Sort the airquality data according to the measured air temperature. Check the functioning of the order and sort functions.

head(airquality[order(airquality$Temp),])
##    Ozone Solar.R Wind Temp Month Day
## 5     NA      NA 14.3   56     5   5
## 18     6      78 18.4   57     5  18
## 25    NA      66 16.6   57     5  25
## 27    NA      NA  8.0   57     5  27
## 15    18      65 13.2   58     5  15
## 26    NA     266 14.9   58     5  26
sort(airquality$Temp)
##   [1] 56 57 57 57 58 58 59 59 61 61 61 62 62 63 64 64 65 65 66 66 66 67 67 67 67
##  [26] 68 68 68 68 69 69 69 70 71 71 71 72 72 72 73 73 73 73 73 74 74 74 74 75 75
##  [51] 75 75 76 76 76 76 76 76 76 76 76 77 77 77 77 77 77 77 78 78 78 78 78 78 79
##  [76] 79 79 79 79 79 80 80 80 80 80 81 81 81 81 81 81 81 81 81 81 81 82 82 82 82
## [101] 82 82 82 82 82 83 83 83 83 84 84 84 84 84 85 85 85 85 85 86 86 86 86 86 86
## [126] 86 87 87 87 87 87 88 88 88 89 89 90 90 90 91 91 92 92 92 92 92 93 93 93 94
## [151] 94 96 97
order(airquality$Temp)
##   [1]   5  18  25  27  15  26   8  21   9  23  24   4  20 148  16 144   7  49
##  [19]   6  13  17   1  28  34 140  14  19 142 153  10  12 147 149 137 138 145
##  [37]   2  48 114  22  50  58  73 133   3  11  33  82  56 115 132 151  31  51
##  [55]  53  54  55 110 135 141 152  47  52  60 108 113 136 150  32  57 111 112
##  [73] 131 139  30  37  46 107 109 116  45  59  76 106 130  29  64  74  77  83
##  [91]  92  93  94 117 134 146  38  44  72  78  84  87  95 105 143  61  66  67
## [109]  91  35  62  65  79 129  36  63  81  86  97  85  88  90  96 103 104 118
## [127]  39  41  80  98 128  68  89 119  71  99  40 100 101  75 124  43  69  70
## [145] 102 125  42 126 127 121 123 122 120

Task 14: Using the apply function, calculate the average of all columns in the airquality object.

apply(airquality,2,mean)
##     Ozone   Solar.R      Wind      Temp     Month       Day 
##        NA        NA  9.957516 77.882353  6.993464 15.803922
apply(airquality,2,mean,na.rm=T)
##      Ozone    Solar.R       Wind       Temp      Month        Day 
##  42.129310 185.931507   9.957516  77.882353   6.993464  15.803922

Task 15: Define a new object in the form of a list where you combine two columns of the airquality dataset and average precipitation objects that you considered in Task 5.

obj <- list(temp=airquality$Temp,veter=airquality$Wind,pad=pos1)
head(obj)
## $temp
##   [1] 67 72 74 62 56 66 65 59 61 69 74 69 66 68 58 64 66 57 68 62 59 73 61 61 57
##  [26] 58 57 67 81 79 76 78 74 67 84 85 79 82 87 90 87 93 92 82 80 79 77 72 65 73
##  [51] 76 77 76 76 76 75 78 73 80 77 83 84 85 81 84 83 83 88 92 92 89 82 73 81 91
##  [76] 80 81 82 84 87 85 74 81 82 86 85 82 86 88 86 83 81 81 81 82 86 85 87 89 90
## [101] 90 92 86 86 82 80 79 77 79 76 78 78 77 72 75 79 81 86 88 97 94 96 94 91 92
## [126] 93 93 87 84 80 78 75 73 81 76 77 71 71 78 67 76 68 82 64 71 81 69 63 70 77
## [151] 75 76 68
## 
## $veter
##   [1]  7.4  8.0 12.6 11.5 14.3 14.9  8.6 13.8 20.1  8.6  6.9  9.7  9.2 10.9 13.2
##  [16] 11.5 12.0 18.4 11.5  9.7  9.7 16.6  9.7 12.0 16.6 14.9  8.0 12.0 14.9  5.7
##  [31]  7.4  8.6  9.7 16.1  9.2  8.6 14.3  9.7  6.9 13.8 11.5 10.9  9.2  8.0 13.8
##  [46] 11.5 14.9 20.7  9.2 11.5 10.3  6.3  1.7  4.6  6.3  8.0  8.0 10.3 11.5 14.9
##  [61]  8.0  4.1  9.2  9.2 10.9  4.6 10.9  5.1  6.3  5.7  7.4  8.6 14.3 14.9 14.9
##  [76] 14.3  6.9 10.3  6.3  5.1 11.5  6.9  9.7 11.5  8.6  8.0  8.6 12.0  7.4  7.4
##  [91]  7.4  9.2  6.9 13.8  7.4  6.9  7.4  4.6  4.0 10.3  8.0  8.6 11.5 11.5 11.5
## [106]  9.7 11.5 10.3  6.3  7.4 10.9 10.3 15.5 14.3 12.6  9.7  3.4  8.0  5.7  9.7
## [121]  2.3  6.3  6.3  6.9  5.1  2.8  4.6  7.4 15.5 10.9 10.3 10.9  9.7 14.9 15.5
## [136]  6.3 10.9 11.5  6.9 13.8 10.3 10.3  8.0 12.6  9.2 10.3 10.3 16.6  6.9 13.2
## [151] 14.3  8.0 11.5
## 
## $pad
## [1] 10  0  0 20 15 10  5

Task 16: Calculate the average values of all 5 variables found in the data from the Veliko Širje water gauging station on the Savinja River (for 2005).

podatki <- read.table(file="C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Savinja-Veliko SirjeI-2005.txt",header=TRUE,sep=";",dec=".") 
apply(podatki[,2:6],2,mean)
##                             vodostaj.cm. 
##                                229.70685 
##                             pretok.m3.s. 
##                                 42.50639 
##                             temp.vode.C. 
##                                 10.57425 
## transport_suspendiranega_materiala.kg.s. 
##                                 10.68967 
##  vsebnost_suspendiranega_materiala.g.m3. 
##                                 57.06575

Task 17: Import any data you have ever used into the R software environment.

podatki <- read.table(file="C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Savinja-Veliko SirjeI-2005.txt",header=TRUE,sep=";",dec=".") 

Task 18: Save an any object in .Rdata format and then load it back into R using the load function.

save(pos1,file="C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/padavine.Rdata")
# object pos1 must exist
load("padavine.Rdata")

Task 19: Find a package to use and calculate L-moments, install and use the package, and calculate the L-moments of the flow data you worked on in Task 16 (L-moments are similar to ordinary statistical moments, mean, variance, skewness, kurtosis).

# install.packages("lmomco")
library(lmomco)
lmoms(podatki$pretok.m3.s.)
## $lambdas
## [1] 42.506386 20.381628 10.237436  6.157273  4.216836
## 
## $ratios
## [1]        NA 0.4794957 0.5022874 0.3020992 0.2068940
## 
## $trim
## [1] 0
## 
## $leftrim
## NULL
## 
## $rightrim
## NULL
## 
## $source
## [1] "lmoms"

Task 20: For the data included in the airGR package, calculate the basic statistics using the apply function and the basic statistics for columns 2, 3, and 4 using the summary function, and identify which hydrological variables are involved and what their units are.

library(airGR) # activation of a package called airGR  
data(L0123001)
apply(BasinObs[,c(2,3,4)],2,summary)
##                 P          T        E
## Min.     0.000000 -18.700000 0.000000
## 1st Qu.  0.000000   4.100000 0.600000
## Median   0.300000   9.100000 1.400000
## Mean     2.914595   9.147088 1.764099
## 3rd Qu.  3.600000  14.500000 2.900000
## Max.    66.800000  28.400000 5.500000
# 2 column-rainfall (mm), 3 column-Air temperature (C)
# 4 column evapotranspiration (mm) 

Task 21: Calculate the maximum, minimum, and average values of the flows in each period (Jan–Mar, Apr– Jun, etc., i.e. ¼ of the year). Hint you are looking for a function similar to as.yearmon.

library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
podatki[,1] <- as.POSIXct(strptime(podatki[,1],format="%d.%m.%Y")) 
Qzoo <- zoo(podatki[,3],podatki[,1]) # define the zoo object
cet <- as.yearqtr(time(Qzoo)+3600)
aggregate(Qzoo, cet, max) 
## 2005 Q1 2005 Q2 2005 Q3 2005 Q4 
##  79.226 179.064 369.615 382.100
aggregate(Qzoo, cet, min) 
## 2005 Q1 2005 Q2 2005 Q3 2005 Q4 
##   9.309   9.568  15.205  10.400
aggregate(Qzoo, cet, mean) 
##  2005 Q1  2005 Q2  2005 Q3  2005 Q4 
## 21.01474 34.82178 65.38862 48.54896

Task 22: For the data from the Savinja watercourse (water level, discharge, temperature, and suspended solids transport), draw a boxplot and label the graph with title, axis labels, etc. The graph’s y-axis should be drawn in log scale.

podatki[,1] <- as.POSIXct(strptime(podatki[,1],format="%d.%m.%Y")) 
boxplot(podatki$vodostaj.cm.,log="y",ylab="Water level [cm]",main="Savinja") 

boxplot(podatki[,3],log="y",ylab="Discahrge [m3/s]",main="Savinja")

boxplot(podatki[,5],log="y",ylab="Sediment transport [kg/s]",main="Savinja")

Task 23: From a set of more advanced graphs (the R graph gallery[^33] website gives a good overview), choose one type of graph to draw and fit the selected graph accordingly.

# random case
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
ggplot(podatki, aes(x=pretok.m3.s., y=vodostaj.cm.) ) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(level)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Task 24: Write a function that allows you to normalise data.

norm <-  function(x){
k1 <- x-min(x)
k2 <- max(x)-min(x)
k3 <- k1/k2
return(k3)}
norm(c(4,3,5,3.3,4.5))
## [1] 0.50 0.00 1.00 0.15 0.75

Task 25: Define a function that takes two arguments (an x vector and a y vector) and plots a scatter plot of these two vectors, where the x and y axes are in log scale.

graf <-  function(x,y){
plot(log(x),log(y))
}
graf(c(1:3),c(2:4))

Task 26: Define and use a function that calculates the mean and variance of your sample.

povvar <-  function(x){
povp <- mean(x)
varianca <- var(x)
return(c(povp,varianca))
}
povvar(3:50)
## [1]  26.5 196.0

Task 27: Use an if loop to check whether all the daily flow data from the Savinja River for 2005 are available. The result of the if loop should be in descriptive form (character) and should tell you whether there are enough data or whether there are some missing data.

# Based on length
if(length(podatki$vodostaj.cm.)==365) {
"All data is available"  
} else {
"Part of data is missing"  
}
## [1] "All data is available"
# using NA function
if(any(is.na(podatki$vodostaj.cm.))) {
"At least one day is missing"  
} else {
"All numerical data is available"  
}
## [1] "All numerical data is available"

Task 28: Using a for loop, randomly generate the vector x and the vector y 5 times (both vectors should contain 10 elements, given a normal distribution, mean 0, and standard deviation 1), and plot both vectors on a scatter plot using the plot function.

for(i in 1:5){
x <- rnorm(10)
y <- rnorm(10)
plot(x,y)
}

Task 29: Plot a graph of two randomly generated samples. Generate both samples using a normal distribution: once with mean 0 and standard deviation 2, then with mean 3 and standard deviation 5. Label the graph accordingly with axis labels, legend, etc.

x <- rnorm(9,mean=0,sd=2)
y <- rnorm(9,mean=3,sd=5)
plot(x,y,xlab="First sample",ylab="Second sample",main="Data generation")

Task 30: Find an appropriate test that can be used to calculate whether or not a trend in the sample is statistically significant. Download the appropriate package and use the test with one randomly generated sample based on a normal distribution with mean 10 and standard deviation 5 (generate 20 numbers). Repeat the procedure for the sequence of numbers from 1:20.

library(Kendall)
## Warning: package 'Kendall' was built under R version 4.1.3
MannKendall(rnorm(20,mean=10,sd=5))
## tau = -0.147, 2-sided pvalue =0.38103
MannKendall(1:20)
## tau = 1, 2-sided pvalue =< 2.22e-16

Task 31: Find a suitable function or model that describes the relationship between flow and suspended solids such that r2 is greater than 0.3. Use data from the Veliko Širje water gauging station on the Savinja River.

mod <- nls(vsebnost_suspendiranega_materiala.g.m3. ~ a*(pretok.m3.s.)^b, data = podatki, start = list(a=10,b=3)) 
RSS.p <- sum(residuals(mod)^2)
TSS <- sum((podatki$vsebnost_suspendiranega_materiala.g.m3. - mean(podatki$vsebnost_suspendiranega_materiala.g.m3.))^2)
r2 <- 1 - (RSS.p/TSS)
r2 # r2 for the model used
## [1] 0.3419598

Task 32: Download daily flow data from the Sava Šentjakob water gauging station for the period 1990–2021 from the website of the Slovenian Environment Agency. Using the lfstat package, analyse the maximum drought events (minimum annual flows) in each year. Perform both frequency analyses of the low flows (Pearson distribution of type 3) and analyses of the duration of these drought events.

Sava <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/hid_3570_1990_2021.txt")
Sava[,1] <- as.POSIXct(strptime(Sava[,1],format="%d.%m.%Y")) 
library(lfstat);library(zoo)
## Warning: package 'lfstat' was built under R version 4.1.3
## Loading required package: xts
## Loading required package: lmom
## 
## Attaching package: 'lmom'
## The following objects are masked from 'package:lmomco':
## 
##     cdfexp, cdfgam, cdfgev, cdfglo, cdfgno, cdfgpa, cdfgum, cdfkap,
##     cdfln3, cdfnor, cdfpe3, cdfwak, cdfwei, quaexp, quagam, quagev,
##     quaglo, quagno, quagpa, quagum, quakap, qualn3, quanor, quape3,
##     quawak, quawei
## Loading required package: lattice
# define zoo object
QzooSava <- zoo(Sava[,3],Sava[,1]) 
# object lfobj, argument hyeastart specifies the start of the hydrological year
nizkiQSava <- createlfobj(ts(QzooSava), startdate = "01/01/1990", hyearstart = 1)
setlfunit("m^3/s") # units
nletniSava <- MAM(nizkiQSava,n=1,yearly=TRUE)   
rezSava <- tyears(nizkiQSava, dist = "pe3", event = 100, plot = TRUE)
## Warning in as.xts.lfobj(x): No unit found in attributes, assuming 'm3/s'.
## Use flowunit(x) <- "l/s" to define the flow unit. See help(flowunit).
## Warning: For fitting minima, a Weibull distribution with parameter 'zeta = 0'
## may be best.

maksTrajanjeSava <- tyearsS(nizkiQSava, dist = "pe3", variable = "d", aggr = "max", event = 100, hyearstart = 1, pooling = "IC")
## Warning in as.xts.lfobj(x): No unit found in attributes, assuming 'm3/s'.
## Use flowunit(x) <- "l/s" to define the flow unit. See help(flowunit).
## Warning in evfit(x = ag, distribution = dist, zeta = zeta, check = check, :
## There were 8 years with zero flow extremes. Therefore a mixed distribution with
## p_0 = 0.25 and zeta = '0' was fitted. L-moments and parameters are only valid
## for the censored time series. See ?tyears for details.

Task 33: Download from the website of the Slovenian Environment Agency the annual peak flow data from the Sava Šentjakob water gauging station for the period 1990–2021 (as an alternative, you can use the data from Task 32). Using the packages lmom* and lmomco, analyse the peak flows. Perform flood frequency analyses of peak flows (GEV and Gumbel distribution).*

# the procedure will be shown using the daily data from Task 32
Sava <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/hid_3570_1990_2021.txt")
Sava[,1] <- as.POSIXct(strptime(Sava[,1],format="%d.%m.%Y")) 
library(zoo)
QzooSava <- zoo(Sava[,3],Sava[,1]) 
leto1Sava <- as.numeric(floor(as.yearmon(time(QzooSava)+3600))) 
letQkonSava <- aggregate(QzooSava,leto1Sava,max,na.rm=TRUE) 
library(lmomco);detach(package:lfstat); detach(package:lmom)
lmomentiSava <- lmoms(as.numeric(letQkonSava))
gevparSava <- lmom2par(lmomentiSava,type="gev") 
gumparSava <- lmom2par(lmomentiSava,type="gum") 
Pdobe <- c(2,5,10,20,30,50,100,300,500,1000) 
verj <- 1-1/Pdobe 
rezgumSava <- quagum(verj,gumparSava)
rezgevSava <- quagev(verj,gevparSava) 
weibullSava <- pp(as.numeric(letQkonSava),a=0) 
Pdobe1Sava <- 1/(1-weibullSava) 
plot(Pdobe,rezgevSava,type="l",log="xy",xlab="Return period [years]",
ylab="Discharge [m3/s]",main="Sava-Sentjakob",ylim=c(500,1500)) 
points(Pdobe1Sava,sort(as.numeric(letQkonSava))) 
lines(Pdobe,rezgumSava,col="red") 

Task 34: Download from the website of the Slovenian Environment Agency the data about annual peaks from the water gauging stations on the Sava River: Radovljica I, Šentjakob, Litija (and Litija I), Hrastnik, and Čatež I for the period 2000–2021. Perform trend analyses for individual water gauging stations (choose any test), as well as a regional Mann-Kendall test.

SavaPostaje <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/SavaPostaje.txt")
library(trend) 
apply(SavaPostaje,2,mk.test)
## $Leto
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = 6.4855, n = 22, p-value = 8.842e-11
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##        S     varS      tau 
##  231.000 1257.667    1.000 
## 
## 
## $Radovljica
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = 0.45117, n = 22, p-value = 0.6519
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
## 1.700000e+01 1.257667e+03 7.359307e-02 
## 
## 
## $Sentjakob
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = 0.22558, n = 22, p-value = 0.8215
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
## 9.000000e+00 1.257667e+03 3.896104e-02 
## 
## 
## $Litija
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = 0.73315, n = 22, p-value = 0.4635
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
##   27.0000000 1257.6666667    0.1168831 
## 
## 
## $Hrastnik
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = 0.3103, n = 22, p-value = 0.7563
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##            S         varS          tau 
## 1.200000e+01 1.256667e+03 5.206086e-02 
## 
## 
## $Catez
## 
##  Mann-Kendall trend test
## 
## data:  newX[, i]
## z = -0.16919, n = 22, p-value = 0.8656
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##             S          varS           tau 
##   -7.00000000 1257.66666667   -0.03030303
library(rkt) 
rkt(date=rep(SavaPostaje[,1],5),y=c(SavaPostaje[,2],SavaPostaje[,3],SavaPostaje[,4],SavaPostaje[,5],SavaPostaje[,6]),block=c(rep(1,length(SavaPostaje[,1])),rep(2,length(SavaPostaje[,1])),rep(3,length(SavaPostaje[,1])),rep(4,length(SavaPostaje[,1])),rep(5,length(SavaPostaje[,1])))) 
## 
## Standard model
## Tau = 0.05021645
## Score =  58
## var(Score) =  6287.333
## 2-sided p-value =  0.4722299
## Theil-Sen's (MK) or seasonal/regional Kendall (SKT/RKT) slope=  2.57325

Task 35: Based on the Joe copula with parameter value 2, generate a bivariate sample with 40 elements. Based on the generated sample, estimate the parameters of the Frank copula and check the suitability of the Frank copula to describe this sample. Additionally, graphically compare the generated sample based on the Frank copula (N=500) and the Joe copula.

library(copula) 
## Warning: package 'copula' was built under R version 4.1.3
gen <- rCopula(40,joeCopula(2))
gofCopula(copula=frankCopula(),gen,optim.method = "L-BFGS-B",N=500,estim.method="mpl",simulation="mult",method="Sn") 
## 
##  Multiplier bootstrap-based goodness-of-fit test of Frank copula, dim.
##  d = 2, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.037056, parameter = 4.9448, p-value = 0.02096
param <- fitCopula(copula=frankCopula(),data=gen,method="mpl") 
kopul2 <- frankCopula(param@estimate, dim=2) 
plot(rCopula(500,kopul2))
points(gen,col="red")

Task 36: For data from one of the ARSO stations, perform seasonality analyses: i) calculate the seasonality coefficient and ii) draw a circular graph showing the timing of the selected events. Repeat the analyses for the annual peak (AM) sample and for the POT sample, selecting on average 3 events above the selected threshold per year (POT3).

library(zoo)
Sava <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/hid_3570_1990_2021.txt")
Sava[,1] <- as.POSIXct(strptime(Sava[,1],format="%d.%m.%Y")) 
QzooSava <- zoo(Sava[,3],Sava[,1]) 
leto1Sava <- as.numeric(floor(as.yearmon(time(QzooSava)+3600))) 
letQkonSava <- aggregate(QzooSava,leto1Sava,which.max)
dan <- letQkonSava 
kot <- (dan-0.5)*(2*pi/365) 
xpov <- mean(cos(kot));x <- cos(kot)  
ypov <- mean(sin(kot));y <- sin(kot)  
r <- sqrt(xpov*xpov+ypov*ypov)
if(xpov>=0 && ypov>=0) dan.rez <- (atan(ypov/xpov))*(365/(2*pi)) else
  if(xpov<0) dan.rez <- (atan(ypov/xpov) + pi)*(365/(2*pi)) else
    if (ypov<0 && xpov >= 0) dan.rez <- (atan(ypov/xpov) + 2*pi)*(365/(2*pi)) else 
      end 
dan.rez 
## [1] 316.1657
symbols(x,y,letQkonSava,inches=0.25,bg="lightblue",xlab="",ylab="",
xlim=c(-1.1,1.1),ylim=c(-1.1,1.1),fg="white",
main="Seasonal representation of vQvk");abline(0,0);abline(v=0) 
par(new=TRUE)
symbols(xpov,ypov,mean(letQkonSava),inches=0.25,bg="red",
xlab="",ylab="",xlim=c(-1.1,1.1),ylim=c(-1.1,1.1),fg="white") 
text(xpov,ypov,"Average") 
text(1,0.1,labels="January")
text(0.2,1,labels="April")
text(-1,0.1,labels="July")
text(0.2,-1,labels="October") 

Task 37: From the ARSO website (meteo.si, measurement archive), obtain monthly precipitation data for the period 1961-2020 for the Murska Sobota station and calculate the standardised precipitation indices (SPI1, SPI3, SPI6, and SPI12). Graphically show the calculated indices.

murska <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/MurskaS.txt",header=FALSE)
library(SPEI) 
## Warning: package 'SPEI' was built under R version 4.1.3
## # Package SPEI (1.8.1) loaded [try SPEINews()].
spiM1 <- spi(data=murska[,2], scale=1, distribution = "Gamma")  
## [1] "Calculating the Standardized Precipitation Evapotranspiration Index (SPEI) at a time scale of 1. Using kernel type 'rectangular', with 0 shift. Fitting the data to a Gamma distribution. Using the ub-pwm parameter fitting method. Checking for missing values (`NA`): all the data must be complete. Using the whole time series as reference period. Input type is vector. No time information provided, assuming a monthly time series."
plot(spiM1) 
## Warning: Removed 732 rows containing missing values (`geom_point()`).

spiM3 <- spi(data=murska[,2], scale=3, distribution = "Gamma")  
## [1] "Calculating the Standardized Precipitation Evapotranspiration Index (SPEI) at a time scale of 3. Using kernel type 'rectangular', with 0 shift. Fitting the data to a Gamma distribution. Using the ub-pwm parameter fitting method. Checking for missing values (`NA`): all the data must be complete. Using the whole time series as reference period. Input type is vector. No time information provided, assuming a monthly time series."
plot(spiM3) 
## Warning: Removed 730 rows containing missing values (`geom_point()`).

spiM6 <- spi(data=murska[,2], scale=6, distribution = "Gamma")  
## [1] "Calculating the Standardized Precipitation Evapotranspiration Index (SPEI) at a time scale of 6. Using kernel type 'rectangular', with 0 shift. Fitting the data to a Gamma distribution. Using the ub-pwm parameter fitting method. Checking for missing values (`NA`): all the data must be complete. Using the whole time series as reference period. Input type is vector. No time information provided, assuming a monthly time series."
plot(spiM6) 
## Warning: Removed 727 rows containing missing values (`geom_point()`).

spiM12 <- spi(data=murska[,2], scale=12, distribution = "Gamma")  
## [1] "Calculating the Standardized Precipitation Evapotranspiration Index (SPEI) at a time scale of 12. Using kernel type 'rectangular', with 0 shift. Fitting the data to a Gamma distribution. Using the ub-pwm parameter fitting method. Checking for missing values (`NA`): all the data must be complete. Using the whole time series as reference period. Input type is vector. No time information provided, assuming a monthly time series."
plot(spiM12) 
## Warning: Removed 721 rows containing missing values (`geom_point()`).

Task 38: Produce Huff curves for rainfall events with a duration between 12 and 24 h and determine both the median Huff curves and the confidence intervals (10% and 90% percentile curves).

data <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Ljubljana-padavine.txt",header=FALSE, sep=" ") 
dataP <- data.frame(Datum=as.POSIXct(paste0(data[,1]," ",data[,2]),format="%Y-%m-%d %H:%M:%S"),P=((data[,3])))
load("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/erozivni.Rdata")
izbira <- which(dogodki$D_h<24 & dogodki$D_h>12) 
zaporedje <- seq(from=0,to=1,by=0.05) 
huffvmes <- matrix(NA,length(izbira),length(zaporedje)) 
for(i in 1:length(izbira)){
padizb <- dataP[(dogodki$endIndex[izbira[i]]-2*dogodki$D_h[izbira[i]]):dogodki$endIndex[izbira[i]],2] 
vsotapad <- sum(padizb) 
for(k in 0:(length(zaporedje))){
huffvmes[i,k] <- approx(seq(0,1,length=length(cumsum(padizb))),cumsum(padizb)/vsotapad,xout=zaporedje[k])$y 
}
}
huffvmes[,1] <- 0 
huffvmes[,21] <- 1 
spmeja <- apply(huffvmes,2,quantile,probs=0.1) 
med <- apply(huffvmes,2,quantile,probs=0.5) 
zgmeja <- apply(huffvmes,2,quantile,probs=0.9) 
plot(zaporedje,med,col="red",lty=1,lwd=1,xlab="Normalized time",ylab="Normalized rainfall",main="Duration 12-24 h") 
lines(zaporedje,spmeja,col="red",lty=2,lwd=2) 
lines(zaporedje,zgmeja,col="red",lty=2,lwd=2) 

Task 39: Download daily precipitation data for 5 stations in eastern Slovenia (for the period 2020–2023) from meteo.si, define a stochastic precipitation model, and generate 20 random realisations of 3 years of precipitation data for each station.

padavine <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/padavine.txt",header=TRUE) 
library(GWEX) 
skupajpad <- as.matrix(padavine[,4:6]) 
myObsPrecpad <- GwexObs(variable='Prec',date=as.Date(padavine[,1],format="%d.%m.%Y"),obs=skupajpad) 
myparPrecpad <- fitGwexModel(myObsPrecpad, listOption=list(th=0.5,nChainFit=1000)) 
## [1] "Fit generator"
rez <- simGwexModel(myparPrecpad,nb.rep=20,d.start = as.Date("01012025","%d%m%Y"),
    d.end = as.Date("31122027","%d%m%Y"))  
## [1] "Generate scenarios"

Task 40: Using data from the Veliko Širje water gauging station (Savinja, year 2005), evaluate the performance of the regression tree algorithm for predicting suspended solids concentrations (based on flow and water temperature data), using part of the data (approx. 80%) for model calibration and the remainder for validation.

podatki <- read.table(file="C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Savinja-Veliko SirjeI-2005.txt",header=TRUE,sep=";",dec=".") 
set.seed(1) 
izbirased <- sample.int(365,290) 
kalibsed <- podatki[izbirased,] 
validsed <- podatki[-izbirased,] 
library(rpart)
library(rpart.plot) 
## Warning: package 'rpart.plot' was built under R version 4.1.3
drevokalibsed <- rpart(vsebnost_suspendiranega_materiala.g.m3. ~ pretok.m3.s. + temp.vode.C.,data=kalibsed) 
napovedsed <- predict(drevokalibsed, validsed, type = 'vector')
cor(validsed$vsebnost_suspendiranega_materiala.g.m3.,napovedsed)
## [1] 0.6130527
# we see that the chosen model predicts the concentration values relatively poorly 
# of suspended solids at the Veliko Širje (Savinja) station

Task 41: Based on the digital elevation model data, determine the slope map using the terrain function in the terra package. Calculate the basic slope statistics.

library(terra)
## Warning: package 'terra' was built under R version 4.1.3
## terra 1.7.23
## 
## Attaching package: 'terra'
## The following object is masked from 'package:zoo':
## 
##     time<-
teren <- rast("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/GIS/SoraSuha.sdat")  
naklon <- terrain(teren,v="slope") # in degrees
summary(naklon)
##      slope      
##  Min.   : 0.02  
##  1st Qu.:11.10  
##  Median :16.39  
##  Mean   :16.64  
##  3rd Qu.:21.74  
##  Max.   :47.13  
##  NA's   :42619

Task 42: Perform calibration of the GR5J model for the period 2005–2011 (using data for the Selška Sora river basin) and validate the model using the calibrated parameters for 2012 and 2013. Show the results of the model validation.

library(airGR) 
data <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/airGR/Data-Example.txt",header=T) 
potET <- PE_Oudin(JD = as.POSIXlt(strptime(data[,1], "%d.%m.%Y"))$yday, Temp = data[,5], Lat = 46.2, LatUnit = "deg")
InputsModel <- CreateInputsModel(FUN_MOD = RunModel_GR5J, DatesR = as.POSIXct(strptime(data[,1], "%d.%m.%Y")), Precip = data[,4], PotEvap = potET)
konec <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2011") 
Ind_Run <- seq(366, konec) 
RunOptions <- CreateRunOptions(FUN_MOD = RunModel_GR5J, InputsModel = InputsModel, IndPeriod_Run = Ind_Run, IndPeriod_WarmUp = 1:365) 
InputsCrit <- CreateInputsCrit(FUN_CRIT = ErrorCrit_NSE, InputsModel = InputsModel, RunOptions = RunOptions, Obs = data[366:konec,3]) 
CalibOptions <- CreateCalibOptions(FUN_MOD = RunModel_GR5J, FUN_CALIB = Calibration_Michel) 
OutputsCalib <- Calibration_Michel(InputsModel = InputsModel, RunOptions = RunOptions, InputsCrit = InputsCrit, CalibOptions = CalibOptions, FUN_MOD = RunModel_GR5J) 
## Grid-Screening in progress (0% 20% 40% 60% 80% 100%)
##   Screening completed (243 runs)
##       Param =  445.858,   -1.386,   83.931,    1.105,    0.467
##       Crit. NSE[Q]       = 0.4626
## Steepest-descent local search in progress
##   Calibration completed (62 iterations, 797 runs)
##       Param =  660.254,   -0.664,  117.290,    0.500,    0.430
##       Crit. NSE[Q]       = 0.7406
ParamGR5J <- OutputsCalib$ParamFinalR 
konec2 <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2013") 
Ind_Run1 <- seq((konec+1),konec2) 
RunOptions1 <- CreateRunOptions(FUN_MOD = RunModel_GR5J, InputsModel = InputsModel, IndPeriod_Run = Ind_Run1) 
## Warning in CreateRunOptions(FUN_MOD = RunModel_GR5J, InputsModel = InputsModel, : model warm up period not defined: default configuration used
##   the year preceding the run period is used
OutputsModel1 <- RunModel_GR5J(InputsModel = InputsModel, RunOptions = RunOptions1, Param = ParamGR5J) 
plot(OutputsModel1, Qobs = data[Ind_Run1,3]) 

Task 43: Run the GR4J model for 2006 –2007 with data from the Selška Sora river basin and randomly generated model parameters (uncalibrated), using the function set.seed(15) before randomly generating 4 numbers. The ranges of the parameters are: x1: 0-2500 mm, x2: -5->5 mm; x3: 0-1000 mm; x4: 0-10 days. Use a uniform distribution. Run the model and compare the results with the measurements.

library(airGR) 
data <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/airGR/Data-Example.txt",header=T) 
potET <- PE_Oudin(JD = as.POSIXlt(strptime(data[,1], "%d.%m.%Y"))$yday, Temp = data[,5], Lat = 46.2, LatUnit = "deg")
InputsModel <- CreateInputsModel(FUN_MOD = RunModel_GR4J, DatesR = as.POSIXct(strptime(data[,1], "%d.%m.%Y")), Precip = data[,4], PotEvap = potET)
konec <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2007") 
Ind_Run <- seq(366, konec) 
RunOptions <- CreateRunOptions(FUN_MOD = RunModel_GR4J, InputsModel = InputsModel, IndPeriod_Run = Ind_Run, IndPeriod_WarmUp = 1:365) 
set.seed(15)
ParamGR4J <- c(runif(1,0,2500),runif(1,-5,5),runif(1,0,1000),runif(1,0,10)) 
OutputsModel <- RunModel_GR4J(InputsModel = InputsModel, RunOptions = RunOptions, Param = ParamGR4J) 
plot(OutputsModel, Qobs = data[Ind_Run,3]) 

Task 44: Run the TUWmodel for the validation period (2011–2016) and graph the results and calculate the NSE criterion.

library(TUWmodel) 
library(hydroGOF)
## Warning: package 'hydroGOF' was built under R version 4.1.3
data <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/airGR/Data-Example.txt",header=T) 
preob1 <- data.frame(DatesR=as.POSIXct(strptime(data[,1], "%d.%m.%Y"),tz="UTC"),P=data[,4], E=potET,Qmm=data[,3],T=data[,5])
load("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/TUWmodelpar.Rdata")
zac <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2010")
zac <- zac+1 
kon <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2016") 
sim2 <- TUWmodel(prec=preob1[zac:kon,2], airt=preob1[zac:kon,5], ep=preob1[zac:kon,3], area=1, param=calibrate_period1$optim$bestmem) 
plot(as.numeric(sim2$q),type="l", col="red",ylab="Discharge [mm]")
lines(preob1[zac:kon,4], col="green") 

NSE(as.numeric(sim2$q),preob1[zac:kon,4])  
## [1] 0.5844474

Task 45: Using the TUWien model, calculate the discharge values (for the same period as in Task 44) for the different parameter combinations used during the model’s optimisation or calibration of the model. Plot the results.

library(TUWmodel) 
data <- read.table("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/airGR/Data-Example.txt",header=T) 
preob1 <- data.frame(DatesR=as.POSIXct(strptime(data[,1], "%d.%m.%Y"),tz="UTC"),P=data[,4], E=potET,Qmm=data[,3],T=data[,5])
load("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/TUWmodelpar.Rdata")
zac <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2010")
zac <- zac+1 
kon <- which(format(data[,1], format = "%d.%m.%Y")=="31.12.2016") 
sim2 <- TUWmodel(prec=preob1[zac:kon,2], airt=preob1[zac:kon,5], ep=preob1[zac:kon,3], area=1, param=calibrate_period1$optim$bestmem) 
plot(as.numeric(sim2$q),type="l", col="red",ylab="Discharge [mm]")
for(g in 1:dim(calibrate_period1$member$bestmemit)[1]){
sim3 <- TUWmodel(prec=preob1[zac:kon,2], airt=preob1[zac:kon,5], ep=preob1[zac:kon,3], area=1, param=calibrate_period1$member$bestmemit[g,])
lines(as.numeric(sim3$q), col=colorRampPalette(c("lightblue", "darkblue"), alpha = TRUE)(60)[g]) 
}

Task 46: Based on CMIP6 simulations, graphically display the monthly dynamics in precipitation in Slovenia and its surroundings (lon=c(13,17),lat=c(45,47)) for the period 2061–2080 using the CMCC-ESM2 model.

library(geodata, quietly=TRUE) 
cmip6dod <- cmip6_tile(model="CMCC-ESM2", lon=c(13,17),lat=c(45,47), ssp="245", time="2061-2080",   var="prec", res=10, path=tempdir())  
plot(cmip6dod)
plot(mean(cmip6dod))

Task 47: Download the monthly precipitation data for the Murska Sobota station that you used in Task 37 from the ARSO website and analyse whether there is a correlation between the NAO index and the precipitation for the selected station (for the selected period, depending on the availability of ARSO data).

library(rsoi, quietly=TRUE) 
nao <- download_nao()
murska <- read.delim("C:/Users/nbezak/OneDrive - Univerza v Ljubljani/Ucbenik/Naloge-gradivo/MurskaS.txt",header=FALSE)
plot(murska[,2],as.numeric(unlist(nao[121:852,3])),xlab="Precipitation [mm]",ylab="NAO")

cor(murska[,2],as.numeric(unlist(nao[121:852,3])))
## [1] -0.1243624