Import data

dat <-
   read_excel(
      system.file("extdata", "SpatioTemporalSpreadData.xlsx",
                  package = "spatiotemporaldynamics"),
      sheet = 1
   )

Examine data

str(dat)
## tibble [1,800 × 18] (S3: tbl_df/tbl/data.frame)
##  $ location         : chr [1:1800] "Billa Billa" "Billa Billa" "Billa Billa" "Billa Billa" ...
##  $ assessment_date  : POSIXct[1:1800], format: "2020-07-02" "2020-07-02" ...
##  $ assessment_number: num [1:1800] 1 1 1 1 1 1 1 1 1 1 ...
##  $ plot_number      : num [1:1800] 1 1 1 1 1 1 1 1 1 1 ...
##  $ distance         : num [1:1800] 0 9 9 9 9 9 9 9 9 6 ...
##  $ quadrat          : chr [1:1800] "F" "N9" "NE9" "E9" ...
##  $ direction        : chr [1:1800] "NA" "North" "NorthEast" "East" ...
##  $ infected_plants  : num [1:1800] 0 0 0 0 0 0 0 0 0 0 ...
##  $ total_plants     : num [1:1800] 36 48 27 57 53 41 39 31 36 54 ...
##  $ incidence        : num [1:1800] 0 0 0 0 0 0 0 0 0 0 ...
##  $ min_temp         : num [1:1800] 3.99 3.99 3.99 3.99 3.99 ...
##  $ max_temp         : num [1:1800] 20 20 20 20 20 ...
##  $ avg_temp         : num [1:1800] 12 12 12 12 12 ...
##  $ avg_wind_speed   : num [1:1800] 1.52 1.52 1.52 1.52 1.52 ...
##  $ total_rain       : num [1:1800] 1 1 1 1 1 1 1 1 1 1 ...
##  $ min_rh           : num [1:1800] 35.2 35.2 35.2 35.2 35.2 ...
##  $ max_rh           : num [1:1800] 82.7 82.7 82.7 82.7 82.7 ...
##  $ avg_rh           : num [1:1800] 58.9 58.9 58.9 58.9 58.9 ...

Combine direction & location columns to get information on whether the rate of disease progress was signficantly faster in quadrats located in a particular direction

dat<-
  unite(dat, direc_loc, c(location, direction), remove = FALSE) 

Prepare data to fit GAMs

Assign variables to the correct classes

cols_1 <- c("location", "quadrat", "direction", "direc_loc")
dat[cols_1] <- lapply(dat[cols_1], factor)

Re-check class

sapply(dat, class)
## $direc_loc
## [1] "factor"
## 
## $location
## [1] "factor"
## 
## $assessment_date
## [1] "POSIXct" "POSIXt" 
## 
## $assessment_number
## [1] "numeric"
## 
## $plot_number
## [1] "numeric"
## 
## $distance
## [1] "numeric"
## 
## $quadrat
## [1] "factor"
## 
## $direction
## [1] "factor"
## 
## $infected_plants
## [1] "numeric"
## 
## $total_plants
## [1] "numeric"
## 
## $incidence
## [1] "numeric"
## 
## $min_temp
## [1] "numeric"
## 
## $max_temp
## [1] "numeric"
## 
## $avg_temp
## [1] "numeric"
## 
## $avg_wind_speed
## [1] "numeric"
## 
## $total_rain
## [1] "numeric"
## 
## $min_rh
## [1] "numeric"
## 
## $max_rh
## [1] "numeric"
## 
## $avg_rh
## [1] "numeric"

Fit univariate GAMs

Use ‘set.seed()’ for reproducibility purposes

Model_1 s(distance)

model_1 <-
  gam(incidence ~ s(distance, k = 4),
      data = dat,
      method = "REML")

summary(model_1)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(distance, k = 4)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.3025     0.8242   23.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df     F p-value    
## s(distance) 2.935  2.997 55.02  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0849   Deviance explained = 8.64%
## -REML = 8953.3  Scale est. = 1222.8    n = 1800
plot(
  model_1,
  pages = 1,
  residuals = TRUE,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_1)[1]
)

Model_2 s(total_rain)

model_2 <-
  gam(incidence ~ s(total_rain, k = 16),
      data = dat,
      method = "REML")

summary(model_2)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.3025     0.4788   40.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df   F p-value    
## s(total_rain) 14.35   14.8 272  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.691   Deviance explained = 69.4%
## -REML = 8023.9  Scale est. = 412.71    n = 1800
plot(
  model_2,
  residuals = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_2)[1]
)

Model_3 assessment_number

model_3 <-
  gam(incidence ~ assessment_number,
      data = dat,
      method = "REML")

summary(model_3)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ assessment_number
## 
## Parametric coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -20.8905     1.4975  -13.95   <2e-16 ***
## assessment_number   7.9503     0.2618   30.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.339   Deviance explained = 33.9%
## -REML = 8658.6  Scale est. = 883.65    n = 1800
plot(
  model_3,
  all.terms = TRUE,
  residuals = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_3)
)

Model_4 s(avg_wind_speed)

model_4 <-
  gam(incidence ~ s(avg_wind_speed, k = 18),
      data = dat,
      method = "REML")

summary(model_4)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(avg_wind_speed, k = 18)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.3025     0.4772   40.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df     F p-value    
## s(avg_wind_speed) 15.84  16.01 253.9  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.693   Deviance explained = 69.6%
## -REML = 8025.1  Scale est. = 409.95    n = 1800
plot(
  model_4,
  residuals = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_4)[1]
)

Model_5 s(avg_rh)

model_5 <-
  gam(incidence ~ s(avg_rh, k = 18),
      data = dat,
      method = "REML")

summary(model_5)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(avg_rh, k = 18)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.3025     0.4767   40.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value    
## s(avg_rh) 16.97     17 239.8  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.694   Deviance explained = 69.7%
## -REML = 8059.5  Scale est. = 409.08    n = 1800
plot(model_5,
     residuals = TRUE,
     pages = 1,
     shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_5)[1])

Model_6 s(avg _temp)

model_6 <-
  gam(incidence ~ s(avg_temp, k = 18),
      data = dat,
      method = "REML")

summary(model_6)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(avg_temp, k = 18)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.3025     0.4773   40.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df     F p-value    
## s(avg_temp) 12.88  14.32 283.9  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.693   Deviance explained = 69.5%
## -REML = 7993.9  Scale est. = 410.08    n = 1800
plot(
  model_6,
  residuals = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_6)[1]
)

Model_6.1 s(avg _temp, by = location)

model_6.1 <-
  gam(incidence ~ s(avg_temp, by = location, k = 18),
      data = dat,
      method = "REML")

summary(model_6.1)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(avg_temp, by = location, k = 18)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.6065     0.8576   19.36   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                   edf Ref.df     F p-value    
## s(avg_temp):locationBilla Billa 6.230  6.748 336.7  <2e-16 ***
## s(avg_temp):locationTosari      7.725  7.933 227.8  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.694   Deviance explained = 69.6%
## -REML = 7993.1  Scale est. = 409.34    n = 1800
plot(
  model_6.1,
  residuals = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_6.1)[1]
)

Model_7 location

model_7 <-
  gam(incidence ~ location,
      data = dat,
      method = "REML")

summary(model_7)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ location
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      17.149      1.154   14.86  < 2e-16 ***
## locationTosari    4.845      1.731    2.80  0.00517 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.00379   Deviance explained = 0.434%
## -REML = 9025.3  Scale est. = 1331.1    n = 1800
plot(
  model_7,
  residuals = TRUE,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_7)[1]
)

Fit multivariate GAMs

Model_8 (All variables)

model_8 <-
  gam(
    incidence ~ s(total_rain, k = 16) +
      s(distance, k = 4) +
      s(avg_wind_speed, k = 18) +
      s(avg_rh, k = 18) +
      s(avg_temp, k = 18) +
      location +
      assessment_number +
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_8)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16) + s(distance, k = 4) + s(avg_wind_speed, 
##     k = 18) + s(avg_rh, k = 18) + s(avg_temp, k = 18) + location + 
##     assessment_number + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    21.51093    6.94148   3.099  0.00197 ** 
## locationTosari                 -7.28480    8.90499  -0.818  0.41343    
## assessment_number              -0.07815    1.10438  -0.071  0.94359    
## direc_locBilla Billa_NA        26.79034    6.71232   3.991 6.84e-05 ***
## direc_locBilla Billa_North     -0.03499    2.09472  -0.017  0.98668    
## direc_locBilla Billa_NorthEast -1.87407    2.09472  -0.895  0.37109    
## direc_locBilla Billa_NorthWest -1.70319    2.09472  -0.813  0.41628    
## direc_locBilla Billa_South     -3.55843    2.09472  -1.699  0.08954 .  
## direc_locBilla Billa_SouthEast -4.99762    2.09472  -2.386  0.01715 *  
## direc_locBilla Billa_SouthWest -2.96581    2.09472  -1.416  0.15700    
## direc_locBilla Billa_West      -0.62089    2.09472  -0.296  0.76695    
## direc_locTosari_East           12.05864    2.34197   5.149 2.91e-07 ***
## direc_locTosari_NA             49.71240    6.87381   7.232 7.05e-13 ***
## direc_locTosari_North           3.31796    2.34197   1.417  0.15673    
## direc_locTosari_NorthEast      18.16437    2.34197   7.756 1.47e-14 ***
## direc_locTosari_NorthWest      -3.28803    2.34197  -1.404  0.16051    
## direc_locTosari_South          -4.32432    2.34197  -1.846  0.06499 .  
## direc_locTosari_SouthEast       0.00000    0.00000      NA       NA    
## direc_locTosari_SouthWest      -3.34241    2.34197  -1.427  0.15370    
## direc_locTosari_West           -3.91910    2.34197  -1.673  0.09442 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df      F p-value    
## s(total_rain)     2.030  2.138  2.768  0.0314 *  
## s(distance)       1.928  1.995 32.592  <2e-16 ***
## s(avg_wind_speed) 1.003  1.005  2.920  0.0873 .  
## s(avg_rh)         2.255  2.485  1.386  0.1973    
## s(avg_temp)       7.811  8.555 30.928  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 88/89
## R-sq.(adj) =  0.803   Deviance explained = 80.7%
## -REML = 7544.3  Scale est. = 263.27    n = 1800
plot(
  model_8,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_8)[1]
)

Model_9 (Fit different smooths for each term by location)

Same as model_8 but different smooths have been considered for each term BY LOCATION

model_9 <-
  gam(
    incidence ~ s(total_rain, k = 16, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_wind_speed, k = 18, by = location) +
      s(avg_temp, k = 18, by = location) +
      s(avg_rh, k = 18, by = location) +
      assessment_number +
      location +
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_9)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16, by = location) + s(distance, 
##     k = 4, by = location) + s(avg_wind_speed, k = 18, by = location) + 
##     s(avg_temp, k = 18, by = location) + s(avg_rh, k = 18, by = location) + 
##     assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     23.87951   19.68772   1.213  0.22533    
## assessment_number                0.01882    2.00906   0.009  0.99253    
## locationTosari                 -29.67809   63.82349  -0.465  0.64199    
## direc_locBilla Billa_NA         38.04027    3.17199  11.993  < 2e-16 ***
## direc_locBilla Billa_North      -0.03499    2.05813  -0.017  0.98644    
## direc_locBilla Billa_NorthEast  -1.87407    2.05813  -0.911  0.36265    
## direc_locBilla Billa_NorthWest  -1.70319    2.05813  -0.828  0.40804    
## direc_locBilla Billa_South      -3.55843    2.05813  -1.729  0.08399 .  
## direc_locBilla Billa_SouthEast  -4.99762    2.05813  -2.428  0.01527 *  
## direc_locBilla Billa_SouthWest  -2.96581    2.05813  -1.441  0.14976    
## direc_locBilla Billa_West       -0.62089    2.05813  -0.302  0.76293    
## direc_locTosari_East            15.97774    2.30106   6.944 5.35e-12 ***
## direc_locTosari_NA              39.81465   13.13429   3.031  0.00247 ** 
## direc_locTosari_North            7.23706    2.30106   3.145  0.00169 ** 
## direc_locTosari_NorthEast       22.08346    2.30106   9.597  < 2e-16 ***
## direc_locTosari_NorthWest        0.63106    2.30106   0.274  0.78392    
## direc_locTosari_South           -0.40523    2.30106  -0.176  0.86023    
## direc_locTosari_SouthEast        3.91910    2.30106   1.703  0.08871 .  
## direc_locTosari_SouthWest        0.57669    2.30106   0.251  0.80214    
## direc_locTosari_West             0.00000    0.00000      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                         edf Ref.df       F  p-value    
## s(total_rain):locationBilla Billa     1.001  1.001   0.033   0.8557    
## s(total_rain):locationTosari          1.002  1.003 231.379  < 2e-16 ***
## s(distance):locationBilla Billa       1.000  1.000   5.369   0.0206 *  
## s(distance):locationTosari            1.965  1.999  46.326  < 2e-16 ***
## s(avg_wind_speed):locationBilla Billa 1.480  1.516   0.486   0.5476    
## s(avg_wind_speed):locationTosari      3.256  3.382   3.633   0.0128 *  
## s(avg_temp):locationBilla Billa       4.426  4.517   5.974 2.64e-05 ***
## s(avg_temp):locationTosari            1.000  1.000  64.132  < 2e-16 ***
## s(avg_rh):locationBilla Billa         1.000  1.000   0.392   0.5317    
## s(avg_rh):locationTosari              1.482  1.543   2.991   0.0315 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 157/158
## R-sq.(adj) =   0.81   Deviance explained = 81.4%
## -REML = 7502.9  Scale est. = 254.15    n = 1800
plot(
  model_9,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_9)[1]
)

Comments on model_8 and model_9

Fitting different smooths for each term by location in model_9improved the adjusted R^2 and percent deviance explained values. Noam Ross encourages this practice in his own words,

“By specifying the ‘by’ argument to the s() function, we can tell R to calculate a different smooth for each unique category. Usually, when we have smooth-factor interactions, we want to also include a varying intercept, in case the different categories are different in overall means in addition to shape of their smooths. Here, you see adding this varying intercept improves the estimate of the smooth.”

Model_10 (Remove by argument from avg_rh and avg_wind_speed)

Remove by argument from avg_rh and avg_wind_speed as they are only weakly significant for Tosari only.

model_10 <-
  gam(
    incidence ~ s(total_rain, k = 16, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_wind_speed, k = 18) +
      s(avg_temp, k = 18, by = location) +
      s(avg_rh, k = 18) +
      assessment_number +
      location + 
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_10)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16, by = location) + s(distance, 
##     k = 4, by = location) + s(avg_wind_speed, k = 18) + s(avg_temp, 
##     k = 18, by = location) + s(avg_rh, k = 18) + assessment_number + 
##     location + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    10.19418    6.91442   1.474  0.14057    
## assessment_number               0.80848    1.08405   0.746  0.45589    
## locationTosari                 -3.95894    6.15431  -0.643  0.52013    
## direc_locBilla Billa_NA        38.04033    3.17106  11.996  < 2e-16 ***
## direc_locBilla Billa_North     -0.03499    2.05761  -0.017  0.98643    
## direc_locBilla Billa_NorthEast -1.87407    2.05761  -0.911  0.36253    
## direc_locBilla Billa_NorthWest -1.70319    2.05761  -0.828  0.40792    
## direc_locBilla Billa_South     -3.55843    2.05761  -1.729  0.08391 .  
## direc_locBilla Billa_SouthEast -4.99762    2.05761  -2.429  0.01525 *  
## direc_locBilla Billa_SouthWest -2.96581    2.05761  -1.441  0.14965    
## direc_locBilla Billa_West      -0.62089    2.05761  -0.302  0.76288    
## direc_locTosari_East           15.40105    2.30048   6.695 2.89e-11 ***
## direc_locTosari_NA             39.23775   13.13408   2.987  0.00285 ** 
## direc_locTosari_North           6.66037    2.30048   2.895  0.00384 ** 
## direc_locTosari_NorthEast      21.50677    2.30048   9.349  < 2e-16 ***
## direc_locTosari_NorthWest       0.05438    2.30048   0.024  0.98114    
## direc_locTosari_South          -0.98191    2.30048  -0.427  0.66956    
## direc_locTosari_SouthEast       3.34241    2.30048   1.453  0.14642    
## direc_locTosari_SouthWest       0.00000    0.00000      NA       NA    
## direc_locTosari_West           -0.57669    2.30048  -0.251  0.80209    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                     edf Ref.df       F p-value    
## s(total_rain):locationBilla Billa 1.000  1.000   0.906  0.3413    
## s(total_rain):locationTosari      3.438  3.773 282.039  <2e-16 ***
## s(distance):locationBilla Billa   1.000  1.000   5.373  0.0206 *  
## s(distance):locationTosari        1.965  1.999  46.348  <2e-16 ***
## s(avg_wind_speed)                 1.001  1.001   0.489  0.4846    
## s(avg_temp):locationBilla Billa   5.603  6.147  26.984  <2e-16 ***
## s(avg_temp):locationTosari        1.002  1.003   0.337  0.5630    
## s(avg_rh)                         1.002  1.003   0.808  0.3692    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 123/124
## R-sq.(adj) =   0.81   Deviance explained = 81.3%
## -REML =   7506  Scale est. = 254.03    n = 1800
plot(
  model_10,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_10)[1]
)

Model_11 (Interaction between total_rain and avg_wind_speed)

model_11 <-
  gam(
    incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) +
      s(distance, k = 4, by = location)  +
      s(avg_temp, k = 18, by = location) +
      s(avg_rh, k = 18) +
      assessment_number +
      location +
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_11)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) + 
##     s(distance, k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     s(avg_rh, k = 18) + assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    11.22801    8.44070   1.330  0.18362    
## assessment_number               0.59818    1.24450   0.481  0.63082    
## locationTosari                  0.00000    0.00000      NA       NA    
## direc_locBilla Billa_NA        38.03938    3.17360  11.986  < 2e-16 ***
## direc_locBilla Billa_North     -0.03499    2.05789  -0.017  0.98644    
## direc_locBilla Billa_NorthEast -1.87407    2.05789  -0.911  0.36259    
## direc_locBilla Billa_NorthWest -1.70319    2.05789  -0.828  0.40799    
## direc_locBilla Billa_South     -3.55843    2.05789  -1.729  0.08395 .  
## direc_locBilla Billa_SouthEast -4.99762    2.05789  -2.429  0.01526 *  
## direc_locBilla Billa_SouthWest -2.96581    2.05789  -1.441  0.14971    
## direc_locBilla Billa_West      -0.62089    2.05789  -0.302  0.76291    
## direc_locTosari_East           11.29817    6.66889   1.694  0.09041 .  
## direc_locTosari_NA             35.13498   14.09731   2.492  0.01278 *  
## direc_locTosari_North           2.55749    6.66889   0.383  0.70140    
## direc_locTosari_NorthEast      17.40390    6.66889   2.610  0.00914 ** 
## direc_locTosari_NorthWest      -4.04850    6.66889  -0.607  0.54388    
## direc_locTosari_South          -5.08479    6.66889  -0.762  0.44588    
## direc_locTosari_SouthEast      -0.76047    6.66889  -0.114  0.90923    
## direc_locTosari_SouthWest      -4.10288    6.66889  -0.615  0.53848    
## direc_locTosari_West           -4.67956    6.66889  -0.702  0.48296    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                    edf Ref.df       F p-value
## s(total_rain,avg_wind_speed):locationBilla Billa 2.003  2.004   0.168  0.8454
## s(total_rain,avg_wind_speed):locationTosari      4.693  4.941 202.992  <2e-16
## s(distance):locationBilla Billa                  1.002  1.003   5.349  0.0207
## s(distance):locationTosari                       1.965  1.999  46.337  <2e-16
## s(avg_temp):locationBilla Billa                  5.181  5.512  25.048  <2e-16
## s(avg_temp):locationTosari                       1.001  1.002   0.162  0.6890
## s(avg_rh)                                        1.003  1.005   0.854  0.3566
##                                                     
## s(total_rain,avg_wind_speed):locationBilla Billa    
## s(total_rain,avg_wind_speed):locationTosari      ***
## s(distance):locationBilla Billa                  *  
## s(distance):locationTosari                       ***
## s(avg_temp):locationBilla Billa                  ***
## s(avg_temp):locationTosari                          
## s(avg_rh)                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 110/111
## R-sq.(adj) =   0.81   Deviance explained = 81.4%
## -REML =   7504  Scale est. = 254.09    n = 1800
plot(model_11, scheme = 1, pages = 1)

Comments on model_11

The effect of avg_wind_speed is not significant at its own in model_10, but significant in interaction with total_rain for Tosari only

Visualize the interaction between avg_wind_speed and total_rain

vis.gam(
  x = model_11,
  view = c("total_rain", "avg_wind_speed"),
  plot.type = "persp",
  theta = 135
)

Add confidence interval to the interaction visualisation/predictions

vis.gam(
  x = model_11,
  view = c("total_rain", "avg_wind_speed"),
  plot.type = "persp",
  se = 2,
  theta = 135
)

The plot indicates the number of standard errors (2 in this case) away from the average prediction to plot high and low prediction surfaces

Model_12 Try tensor interactions instead of smooth interactions

Tensor smooths let us model interactions that operate on different scales. Tensor interaction takes longer to run the model, and requires more data.

model_12 <-
  gam(
    incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) +
      s(distance, k = 4, by = location)  +
      s(avg_temp, k = 18, by = location) +
      s(avg_rh, k = 18) +
      assessment_number +
      location +
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_12)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) + 
##     s(distance, k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     s(avg_rh, k = 18) + assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    11.22801    8.44070   1.330  0.18362    
## assessment_number               0.59818    1.24450   0.481  0.63082    
## locationTosari                  0.00000    0.00000      NA       NA    
## direc_locBilla Billa_NA        38.03938    3.17360  11.986  < 2e-16 ***
## direc_locBilla Billa_North     -0.03499    2.05789  -0.017  0.98644    
## direc_locBilla Billa_NorthEast -1.87407    2.05789  -0.911  0.36259    
## direc_locBilla Billa_NorthWest -1.70319    2.05789  -0.828  0.40799    
## direc_locBilla Billa_South     -3.55843    2.05789  -1.729  0.08395 .  
## direc_locBilla Billa_SouthEast -4.99762    2.05789  -2.429  0.01526 *  
## direc_locBilla Billa_SouthWest -2.96581    2.05789  -1.441  0.14971    
## direc_locBilla Billa_West      -0.62089    2.05789  -0.302  0.76291    
## direc_locTosari_East           11.29817    6.66889   1.694  0.09041 .  
## direc_locTosari_NA             35.13498   14.09731   2.492  0.01278 *  
## direc_locTosari_North           2.55749    6.66889   0.383  0.70140    
## direc_locTosari_NorthEast      17.40390    6.66889   2.610  0.00914 ** 
## direc_locTosari_NorthWest      -4.04850    6.66889  -0.607  0.54388    
## direc_locTosari_South          -5.08479    6.66889  -0.762  0.44588    
## direc_locTosari_SouthEast      -0.76047    6.66889  -0.114  0.90923    
## direc_locTosari_SouthWest      -4.10288    6.66889  -0.615  0.53848    
## direc_locTosari_West           -4.67956    6.66889  -0.702  0.48296    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                    edf Ref.df       F p-value
## s(total_rain,avg_wind_speed):locationBilla Billa 2.003  2.004   0.168  0.8454
## s(total_rain,avg_wind_speed):locationTosari      4.693  4.941 202.992  <2e-16
## s(distance):locationBilla Billa                  1.002  1.003   5.349  0.0207
## s(distance):locationTosari                       1.965  1.999  46.337  <2e-16
## s(avg_temp):locationBilla Billa                  5.181  5.512  25.048  <2e-16
## s(avg_temp):locationTosari                       1.001  1.002   0.162  0.6890
## s(avg_rh)                                        1.003  1.005   0.854  0.3566
##                                                     
## s(total_rain,avg_wind_speed):locationBilla Billa    
## s(total_rain,avg_wind_speed):locationTosari      ***
## s(distance):locationBilla Billa                  *  
## s(distance):locationTosari                       ***
## s(avg_temp):locationBilla Billa                  ***
## s(avg_temp):locationTosari                          
## s(avg_rh)                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 110/111
## R-sq.(adj) =   0.81   Deviance explained = 81.4%
## -REML =   7504  Scale est. = 254.09    n = 1800
plot(model_12, scheme = 1, pages = 1)

Model_13

Same as model_10 but relative humidity removed.

model_13 <-
  gam(
    incidence ~ s(total_rain, k = 16, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_temp, k = 18, by = location) +
      s(avg_wind_speed, k = 18) +
      assessment_number +
      location + 
      direc_loc,
      data = dat,
    method = "REML"
  )

summary(model_13)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16, by = location) + s(distance, 
##     k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     s(avg_wind_speed, k = 18) + assessment_number + location + 
##     direc_loc
## 
## Parametric coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -41.44525   14.85023  -2.791 0.005313 ** 
## assessment_number                9.13265    1.62278   5.628 2.12e-08 ***
## locationTosari                  16.26187   16.87546   0.964 0.335358    
## direc_locBilla Billa_NA         38.04010    3.17244  11.991  < 2e-16 ***
## direc_locBilla Billa_North      -0.03499    2.05817  -0.017 0.986439    
## direc_locBilla Billa_NorthEast  -1.87407    2.05817  -0.911 0.362658    
## direc_locBilla Billa_NorthWest  -1.70319    2.05817  -0.828 0.408053    
## direc_locBilla Billa_South      -3.55843    2.05817  -1.729 0.083997 .  
## direc_locBilla Billa_SouthEast  -4.99762    2.05817  -2.428 0.015274 *  
## direc_locBilla Billa_SouthWest  -2.96581    2.05817  -1.441 0.149765    
## direc_locBilla Billa_West       -0.62089    2.05817  -0.302 0.762938    
## direc_locTosari_East            16.38296    2.30111   7.120 1.57e-12 ***
## direc_locTosari_NA              40.21989   13.13431   3.062 0.002230 ** 
## direc_locTosari_North            7.64228    2.30111   3.321 0.000915 ***
## direc_locTosari_NorthEast       22.48869    2.30111   9.773  < 2e-16 ***
## direc_locTosari_NorthWest        1.03629    2.30111   0.450 0.652518    
## direc_locTosari_South            0.00000    0.00000      NA       NA    
## direc_locTosari_SouthEast        4.32432    2.30111   1.879 0.060377 .  
## direc_locTosari_SouthWest        0.98191    2.30111   0.427 0.669640    
## direc_locTosari_West             0.40523    2.30111   0.176 0.860235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                     edf Ref.df       F  p-value    
## s(total_rain):locationBilla Billa 1.003  1.003   0.770   0.3808    
## s(total_rain):locationTosari      1.000  1.000 133.768  < 2e-16 ***
## s(distance):locationBilla Billa   1.000  1.001   5.365   0.0206 *  
## s(distance):locationTosari        1.965  1.999  46.324  < 2e-16 ***
## s(avg_temp):locationBilla Billa   2.792  2.899   2.189   0.1377    
## s(avg_temp):locationTosari        1.009  1.011  58.740  < 2e-16 ***
## s(avg_wind_speed)                 8.664  9.002   5.167 5.07e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 106/107
## R-sq.(adj) =   0.81   Deviance explained = 81.4%
## -REML =   7517  Scale est. = 254.16    n = 1800
plot(
  model_13,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_13)[1]
)

Model_14

Same as model_13 but considering the interaction between total rain and wind speed.

model_14 <-
  gam(
    incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_temp, k = 18, by = location) +
      assessment_number +
      location +
      direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_14)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, avg_wind_speed, k = 18, by = location) + 
##     s(distance, k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     24.59770   29.71854   0.828  0.40796    
## assessment_number                1.19570    1.04337   1.146  0.25195    
## locationTosari                  16.83229   31.50177   0.534  0.59318    
## direc_locBilla Billa_NA         38.03945    3.17375  11.986  < 2e-16 ***
## direc_locBilla Billa_North      -0.03499    2.05808  -0.017  0.98644    
## direc_locBilla Billa_NorthEast  -1.87407    2.05808  -0.911  0.36264    
## direc_locBilla Billa_NorthWest  -1.70319    2.05808  -0.828  0.40803    
## direc_locBilla Billa_South      -3.55843    2.05808  -1.729  0.08398 .  
## direc_locBilla Billa_SouthEast  -4.99762    2.05808  -2.428  0.01527 *  
## direc_locBilla Billa_SouthWest  -2.96581    2.05808  -1.441  0.14975    
## direc_locBilla Billa_West       -0.62089    2.05808  -0.302  0.76293    
## direc_locTosari_East           -23.83694   13.13360  -1.815  0.06970 .  
## direc_locTosari_NA               0.00000    0.00000      NA       NA    
## direc_locTosari_North          -32.57762   13.13360  -2.480  0.01321 *  
## direc_locTosari_NorthEast      -17.73121   13.13360  -1.350  0.17717    
## direc_locTosari_NorthWest      -39.18361   13.13360  -2.983  0.00289 ** 
## direc_locTosari_South          -40.21990   13.13360  -3.062  0.00223 ** 
## direc_locTosari_SouthEast      -35.89558   13.13360  -2.733  0.00634 ** 
## direc_locTosari_SouthWest      -39.23799   13.13360  -2.988  0.00285 ** 
## direc_locTosari_West           -39.81467   13.13360  -3.032  0.00247 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                    edf Ref.df       F p-value
## s(total_rain,avg_wind_speed):locationBilla Billa 7.855  7.981 203.787  <2e-16
## s(total_rain,avg_wind_speed):locationTosari      4.738  4.953 268.585  <2e-16
## s(distance):locationBilla Billa                  1.002  1.003   5.349  0.0207
## s(distance):locationTosari                       1.965  1.999  46.328  <2e-16
## s(avg_temp):locationBilla Billa                  1.000  1.000   0.102  0.7496
## s(avg_temp):locationTosari                       1.001  1.001   0.585  0.4456
##                                                     
## s(total_rain,avg_wind_speed):locationBilla Billa ***
## s(total_rain,avg_wind_speed):locationTosari      ***
## s(distance):locationBilla Billa                  *  
## s(distance):locationTosari                       ***
## s(avg_temp):locationBilla Billa                     
## s(avg_temp):locationTosari                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 93/94
## R-sq.(adj) =   0.81   Deviance explained = 81.4%
## -REML = 7512.1  Scale est. = 254.14    n = 1800
plot(
  model_14,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_14)[1]
)

Model_15

Same as model_13 but without wind speed.

model_15 <-
  gam(
    incidence ~ s(total_rain, k = 16, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_temp, k = 18, by = location) +
      assessment_number +
      location + direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_15)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16, by = location) + s(distance, 
##     k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.97249    5.21760   0.761  0.44654    
## assessment_number               2.01680    0.71049   2.839  0.00458 ** 
## locationTosari                 -3.62075    3.24067  -1.117  0.26402    
## direc_locBilla Billa_NA        38.04020    3.17171  11.994  < 2e-16 ***
## direc_locBilla Billa_North     -0.03499    2.05786  -0.017  0.98644    
## direc_locBilla Billa_NorthEast -1.87407    2.05786  -0.911  0.36258    
## direc_locBilla Billa_NorthWest -1.70319    2.05786  -0.828  0.40798    
## direc_locBilla Billa_South     -3.55843    2.05786  -1.729  0.08395 .  
## direc_locBilla Billa_SouthEast -4.99762    2.05786  -2.429  0.01526 *  
## direc_locBilla Billa_SouthWest -2.96581    2.05786  -1.441  0.14970    
## direc_locBilla Billa_West      -0.62089    2.05786  -0.302  0.76290    
## direc_locTosari_East           15.97774    2.30075   6.945 5.32e-12 ***
## direc_locTosari_NA             39.81454   13.13416   3.031  0.00247 ** 
## direc_locTosari_North           7.23706    2.30075   3.146  0.00169 ** 
## direc_locTosari_NorthEast      22.08346    2.30075   9.598  < 2e-16 ***
## direc_locTosari_NorthWest       0.63106    2.30075   0.274  0.78390    
## direc_locTosari_South          -0.40523    2.30075  -0.176  0.86021    
## direc_locTosari_SouthEast       3.91910    2.30075   1.703  0.08867 .  
## direc_locTosari_SouthWest       0.57669    2.30075   0.251  0.80211    
## direc_locTosari_West            0.00000    0.00000      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                     edf Ref.df       F p-value    
## s(total_rain):locationBilla Billa 1.350  1.516   0.270  0.6027    
## s(total_rain):locationTosari      3.497  3.841 262.965  <2e-16 ***
## s(distance):locationBilla Billa   1.000  1.001   5.369  0.0206 *  
## s(distance):locationTosari        1.965  1.999  46.338  <2e-16 ***
## s(avg_temp):locationBilla Billa   5.735  6.280 150.640  <2e-16 ***
## s(avg_temp):locationTosari        1.052  1.091   1.241  0.2878    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 89/90
## R-sq.(adj) =   0.81   Deviance explained = 81.3%
## -REML = 7510.4  Scale est. = 254.09    n = 1800
plot(
  model_15,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_15)[1]
)

Model_16

Add the effect avg_rh to model_15 if a better fit can be achieved

model_16 <-
  gam(
    incidence ~ s(total_rain, k = 16, by = location) +
      s(distance, k = 4, by = location) +
      s(avg_temp, k = 18, by = location) +
      s(avg_rh, k = 18) +
      assessment_number +
      location +
     direc_loc,
    data = dat,
    method = "REML"
  )

summary(model_16)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## incidence ~ s(total_rain, k = 16, by = location) + s(distance, 
##     k = 4, by = location) + s(avg_temp, k = 18, by = location) + 
##     s(avg_rh, k = 18) + assessment_number + location + direc_loc
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     7.45091    5.88619   1.266 0.205741    
## assessment_number               1.01397    1.04820   0.967 0.333508    
## locationTosari                 -1.49542    3.68806  -0.405 0.685177    
## direc_locBilla Billa_NA        38.04023    3.17118  11.996  < 2e-16 ***
## direc_locBilla Billa_North     -0.03499    2.05756  -0.017 0.986435    
## direc_locBilla Billa_NorthEast -1.87407    2.05756  -0.911 0.362514    
## direc_locBilla Billa_NorthWest -1.70319    2.05756  -0.828 0.407913    
## direc_locBilla Billa_South     -3.55843    2.05756  -1.729 0.083905 .  
## direc_locBilla Billa_SouthEast -4.99762    2.05756  -2.429 0.015244 *  
## direc_locBilla Billa_SouthWest -2.96581    2.05756  -1.441 0.149643    
## direc_locBilla Billa_West      -0.62089    2.05756  -0.302 0.762869    
## direc_locTosari_East           16.38296    2.30042   7.122 1.55e-12 ***
## direc_locTosari_NA             40.21964   13.13405   3.062 0.002230 ** 
## direc_locTosari_North           7.64228    2.30042   3.322 0.000912 ***
## direc_locTosari_NorthEast      22.48869    2.30042   9.776  < 2e-16 ***
## direc_locTosari_NorthWest       1.03629    2.30042   0.450 0.652420    
## direc_locTosari_South           0.00000    0.00000      NA       NA    
## direc_locTosari_SouthEast       4.32432    2.30042   1.880 0.060300 .  
## direc_locTosari_SouthWest       0.98191    2.30042   0.427 0.669547    
## direc_locTosari_West            0.40523    2.30042   0.176 0.860194    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                     edf Ref.df       F p-value    
## s(total_rain):locationBilla Billa 1.002  1.003   0.871  0.3502    
## s(total_rain):locationTosari      3.443  3.779 324.700  <2e-16 ***
## s(distance):locationBilla Billa   1.000  1.000   5.371  0.0206 *  
## s(distance):locationTosari        1.965  1.999  46.350  <2e-16 ***
## s(avg_temp):locationBilla Billa   5.702  6.274  29.741  <2e-16 ***
## s(avg_temp):locationTosari        1.013  1.022   0.154  0.7116    
## s(avg_rh)                         1.002  1.003   1.293  0.2556    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 106/107
## R-sq.(adj) =   0.81   Deviance explained = 81.3%
## -REML =   7508  Scale est. = 254.01    n = 1800
plot(
  model_16,
  all.terms = TRUE,
  pages = 1,
  shade = TRUE,
  seWithMean = TRUE,
  shift = coef(model_16)[1]
)

Comments on model_16

The effect of avg_rh is not significant but adding it to the model resulted in a better fit, so avg_rh has been included in the final model.

Compare the models

AIC, BIC

models <- list(
  model_1 = model_1,
  model_2 = model_2,
  model_3 = model_3,
  model_4 = model_4,
  model_5 = model_5,
  model_6 = model_6,
  model_6.1 = model_6.1,
  model_7 = model_7,
  model_8 = model_8,
  model_9 = model_9,
  model_10 = model_10,
  model_11 = model_11,
  model_12 = model_12,
  model_13 = model_13,
  model_14 = model_14,
  model_15 = model_15,
  model_16 = model_16
 )
map_df(models, glance, .id = "model") %>%
  arrange(AIC)
## # A tibble: 17 x 8
##    model        df logLik    AIC    BIC deviance df.residual  nobs
##    <chr>     <dbl>  <dbl>  <dbl>  <dbl>    <dbl>       <dbl> <int>
##  1 model_16  34.1  -7521. 15112. 15307.  448554.       1766.  1800
##  2 model_15  33.6  -7521. 15114. 15310.  448818.       1766.  1800
##  3 model_10  35.0  -7520. 15114. 15316.  448353.       1765.  1800
##  4 model_11  35.8  -7520. 15114. 15318.  448260.       1764.  1800
##  5 model_12  35.8  -7520. 15114. 15318.  448260.       1764.  1800
##  6 model_13  36.4  -7520. 15115. 15322.  448236.       1764.  1800
##  7 model_14  36.6  -7520. 15115. 15324.  448164.       1763.  1800
##  8 model_9   36.6  -7520. 15115. 15324.  448171.       1763.  1800
##  9 model_8   34.0  -7553. 15178. 15377.  464929.       1766.  1800
## 10 model_6.1 15.0  -7960. 15953. 16044.  730695.       1785.  1800
## 11 model_5   18.0  -7958. 15953. 16058.  728998.       1782.  1800
## 12 model_6   13.9  -7962. 15955. 16040.  732449.       1786.  1800
## 13 model_4   16.8  -7960. 15956. 16055.  731007.       1783.  1800
## 14 model_2   15.4  -7967. 15967. 16057.  736538.       1785.  1800
## 15 model_3    2.   -8659. 17323. 17340. 1588802.       1798   1800
## 16 model_1    3.94 -8950. 17910. 17938. 2196139.       1796.  1800
## 17 model_7    2.00 -9027. 18061. 18077. 2393383.       1798   1800

R2

enframe(
  c(
    model_1 = summary(model_1)$r.sq,
    model_2 = summary(model_2)$r.sq,
    model_3 = summary(model_3)$r.sq,
    model_4 = summary(model_4)$r.sq,
    model_5 = summary(model_5)$r.sq,
    model_6 = summary(model_6)$r.sq,
    model_6.1 = summary(model_6.1)$r.sq,
    model_7 = summary(model_7)$r.sq,
    model_8 = summary(model_8)$r.sq,
    model_9 = summary(model_9)$r.sq,
    model_10 = summary(model_10)$r.sq,
    model_11 = summary(model_11)$r.sq,
    model_12 = summary(model_12)$r.sq,
    model_13 = summary(model_13)$r.sq,
    model_14 = summary(model_14)$r.sq,
    model_15 = summary(model_15)$r.sq,
    model_16 = summary(model_16)$r.sq
  )
) %>%
  arrange(desc(value))
## # A tibble: 17 x 2
##    name        value
##    <chr>       <dbl>
##  1 model_16  0.810  
##  2 model_10  0.810  
##  3 model_15  0.810  
##  4 model_11  0.810  
##  5 model_12  0.810  
##  6 model_14  0.810  
##  7 model_9   0.810  
##  8 model_13  0.810  
##  9 model_8   0.803  
## 10 model_5   0.694  
## 11 model_6.1 0.694  
## 12 model_4   0.693  
## 13 model_6   0.693  
## 14 model_2   0.691  
## 15 model_3   0.339  
## 16 model_1   0.0849 
## 17 model_7   0.00379

ANOVA

anova(
  model_1,
  model_2,
  model_3,
  model_4,
  model_5,
  model_6,
  model_6.1,
  model_7,
  model_8,
  model_9,
  model_10,
  model_11,
  model_12,
  model_13, 
  model_14,
  model_15,
  model_
 )
## Error in anova.gam(model_1, model_2, model_3, model_4, model_5, model_6, : object 'model_' not found

Perform checks on the best fitting model_16

gam.check and concurvity

gam.check(model_16)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 11 iterations.
## Gradient range [-0.001914579,0.004457845]
## (score 7508.032 & scale 254.0128).
## Hessian positive definite, eigenvalue range [9.299123e-05,886.5037].
## Model rank =  106 / 107 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                      k'   edf k-index p-value    
## s(total_rain):locationBilla Billa 15.00  1.00    0.71  <2e-16 ***
## s(total_rain):locationTosari      15.00  3.44    0.71  <2e-16 ***
## s(distance):locationBilla Billa    3.00  1.00    0.45  <2e-16 ***
## s(distance):locationTosari         3.00  1.97    0.45  <2e-16 ***
## s(avg_temp):locationBilla Billa   17.00  5.70    0.72  <2e-16 ***
## s(avg_temp):locationTosari        17.00  1.01    0.72  <2e-16 ***
## s(avg_rh)                         17.00  1.00    0.71  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Comments on the model check

The model fit could have been much better if we had enough basis function for total_rain, distance and avg_temp and avg_rh (shown by significant result in the diagnostic function).

concurvity(model_15, full=TRUE)
##          para s(total_rain):locationBilla Billa s(total_rain):locationTosari
## worst       1                          1.146525                 2.839383e+30
## observed    1                          1.000000                 2.922429e+00
## estimate    1                          1.000000                 1.000000e+00
##          s(distance):locationBilla Billa s(distance):locationTosari
## worst                          1.0000000                  1.0000000
## observed                       0.2210998                  0.3684495
## estimate                       0.2280612                  0.2012842
##          s(avg_temp):locationBilla Billa s(avg_temp):locationTosari
## worst                           1.127943               7.104991e+29
## observed                        1.000000               9.783470e-01
## estimate                        1.000000               1.000000e+00

Final thoughts

The model_16 seems to be the best fit model based on lower AIC/BIC values and high adjusted R2 value.