How many months?

m <- stations %>%
  select(station_name, interval, start, end, normals) %>%
  mutate(nmonths = (end - start) * 12)
m
## # A tibble: 26,316 x 6
##    station_name        interval start   end normals nmonths
##    <chr>               <chr>    <dbl> <dbl> <lgl>     <dbl>
##  1 DAYSLAND            day       1908  1922 FALSE       168
##  2 DAYSLAND            hour        NA    NA FALSE        NA
##  3 DAYSLAND            month     1908  1922 FALSE       168
##  4 EDMONTON CORONATION day       1978  1979 FALSE        12
##  5 EDMONTON CORONATION hour        NA    NA FALSE        NA
##  6 EDMONTON CORONATION month     1978  1979 FALSE        12
##  7 FLEET               day       1987  1990 FALSE        36
##  8 FLEET               hour        NA    NA FALSE        NA
##  9 FLEET               month     1987  1990 FALSE        36
## 10 GOLDEN VALLEY       day       1987  1998 FALSE       132
## # … with 26,306 more rows
sum(m$nmonths, na.rm = TRUE)/12
## [1] 402648
skim(m)
Data summary
Name m
Number of rows 26316
Number of columns 6
_______________________
Column type frequency:
character 2
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
station_name 0 1 3 51 0 7947 0
interval 0 1 3 5 0 3 0

Variable type: logical

skim_variable n_missing complete_rate mean count
normals 0 1 0.08 FAL: 24153, TRU: 2163

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
start 8160 0.69 1967.90 30.00 1840 1956 1970 1989 2020 ▁▁▂▇▅
end 8160 0.69 1990.07 23.66 1878 1974 1993 2007 2021 ▁▁▂▆▇
nmonths 8160 0.69 266.13 283.27 0 60 168 372 2124 ▇▂▁▁▁
filter(stations, start < 1850) %>%
  as.data.frame()
##   prov station_name station_id climate_id WMO_id TC_id   lat   lon  elev
## 1   ON      TORONTO       5051    6158350  71266  <NA> 43.67 -79.4 112.5
## 2   ON      TORONTO       5051    6158350  71266  <NA> 43.67 -79.4 112.5
##          tz interval start  end normals
## 1 Etc/GMT+5      day  1840 2017    TRUE
## 2 Etc/GMT+5    month  1840 2006    TRUE
filter(stations, start < 1900) %>%
  pull(station_name) %>%
  unique() %>%
  length()
## [1] 365

Station with hours, days and months

stations_search("Winnipeg", starts_latest = 2000, ends_earliest = 2000)
## # A tibble: 14 x 14
##    prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
##    <chr> <chr>             <dbl> <chr>       <dbl> <chr> <dbl> <dbl> <dbl> <chr>
##  1 MB    WINNIPEG A …      27174 502S001     71849 XWG    49.9 -97.2  239. Etc/…
##  2 MB    WINNIPEG A …      27174 502S001     71849 XWG    49.9 -97.2  239. Etc/…
##  3 MB    WINNIPEG A …      27525 5023223        NA <NA>   49.9 -97.2  239. Etc/…
##  4 MB    WINNIPEG A …      27525 5023223        NA <NA>   49.9 -97.2  239. Etc/…
##  5 MB    WINNIPEG RI…       3698 5023222     71852 YWG    49.9 -97.2  239. Etc/…
##  6 MB    WINNIPEG RI…       3698 5023222     71852 YWG    49.9 -97.2  239. Etc/…
##  7 MB    WINNIPEG RI…       3698 5023222     71852 YWG    49.9 -97.2  239. Etc/…
##  8 MB    WINNIPEG SO…      27230 502M001        NA <NA>   49.8 -97.1  232  Etc/…
##  9 MB    WINNIPEG SO…      27230 502M001        NA <NA>   49.8 -97.1  232  Etc/…
## 10 MB    WINNIPEG TH…      28051 5023262     71579 XWN    49.9 -97.1  230  Etc/…
## 11 MB    WINNIPEG TH…      28051 5023262     71579 XWN    49.9 -97.1  230  Etc/…
## 12 MB    WINNIPEG TH…      28051 5023262     71579 XWN    49.9 -97.1  230  Etc/…
## 13 MB    WINNIPEGOSIS       3854 5043225        NA <NA>   51.6 -99.9  258. Etc/…
## 14 MB    WINNIPEGOSIS       3854 5043225        NA <NA>   51.6 -99.9  258. Etc/…
## # … with 4 more variables: interval <chr>, start <dbl>, end <dbl>,
## #   normals <lgl>
s <- c(3698, 28051)

How much data for hours in a month?

d <- weather_dl(s, start = "2000-01-01", end = "2000-01-31")
## As of weathercan v0.3.0 time display is either local time or UTC
## See Details under ?weather_dl for more information.
## This message is shown once per session
size_hour <- object_size(d) / length(s) # Average Per station
size_hour
## 210 kB

How much data for days in a month?

d <- weather_dl(s, start = "2000-01-01", end = "2000-01-31", interval = "day")
size_day <- object_size(d) / length(s) # Average Per station
size_day
## 13.1 kB

How much data for months in a year?

d <- weather_dl(s, start = "2000-01-01", end = "2000-12-31", interval = "month")
size_month <- object_size(d) / length(s) # Average Per station
size_month
## 6.85 kB

How much data?

total <- m %>%
  mutate(size = case_when(interval == "hour" ~ nmonths * size_hour,
                          interval == "day" ~ nmonths * size_day,
                          interval == "month" ~ nmonths/12 * size_month)) %>%
  pull(size) %>%
  sum(na.rm = TRUE)
total
## [1] 122931749776
format(structure(total, class = "object_size"), units = "auto")
## [1] "114.5 Gb"

Climate normals

n <- html_session(file.path(getOption("weathercan.urls.normals"), "1981-2010"))

size <- list()
for(p in weathercan:::province) {
  size[[p]] <- follow_link(n, p) %>%
    html_nodes(css = "pre") %>%
    html_text() %>%
    str_extract_all("[0-9K.]{1,5}[ ]*\n", simplify = TRUE) %>%
    str_extract_all("[0-9K.]{1,5}", simplify = TRUE) %>%
    as.vector()
}
## Navigating to AB/
## Navigating to BC/
## Navigating to MB/
## Navigating to NB/
## Navigating to NL/
## Navigating to NT/
## Navigating to NS/
## Navigating to NU/
## Navigating to ON/
## Navigating to PE/
## Navigating to QC/
## Navigating to SK/
## Navigating to YT/
tibble(char = unlist(size, use.names = FALSE)) %>%
  mutate(kilo = str_detect(char, "K"),
         size = as.numeric(str_remove(char, "K")),
         size = if_else(kilo, size * 1000, size)) %>%
  pull(size) %>%
  sum() %>%
  structure(., class = "object_size") %>%
  format(., units = "auto")
## [1] "5.7 Mb"