In this notebook we will generate a larger ensemble of fields and run some statistical analyses.

Software version

These calculations were run with fldgen-v1.0.0

Setup

These are the parameters we will be using for the calculations below.

## parameters for the code below.
ngen <- 20            # number of fields to generate
set.seed(8675309)     # Set RNG seed so results will be reproducible
siglvl <- 0.05        # significance level for statistical tests
method <- 1           # method to use in mkcorrts
datadir <- './data'
emulator <- train(datadir, latvar='lat_2', lonvar='lon_2')

Generate the fields. We’ll save the basis projections, but we’ll just run statistics on the reconstituted fields (they take a lot of memory otherwise.)

## NB: You cannot do the first step in parallel because despite appearances, it is not a pure
##     function.  Each call depends on the last via the RNG!  We could fix this by using a RNG
##     with independent streams, if it looks like a problem.
projts <- lapply(1:ngen, function(x) {mkcorrts(emulator, method=method)})
wrkfun <- function(bcoord) {
  reconst_fields(emulator$reof$rotation, bcoord)
}
cl <- makeForkCluster()
fields_m <- parLapply(cl, projts, wrkfun) %>% do.call(rbind, .)

## compute the variance in each grid cell
cellvar <- parApply(cl, fields_m, 2, var)
swrslt <- parApply(cl, fields_m, 2, swpval)

stopCluster(cl)

Run the variance analysis.

obscellvar <- apply(emulator$pscl$r, 2, var)
fstats <- cellvar / obscellvar
df1 <- nrow(fields_m) - 1
df2 <- nrow(emulator$pscl$r) - 1

testpwrs <- sapply(c('1%'=0.01, '2.5%'=0.025, '5%'=0.05, '10%'=0.1),
                   function(x) {power.f(df1, df2, x, siglvl)})
falpha <- c(siglvl/2, 1-siglvl/2)    # critical F-values for a two-tailed test
fq <- qf(falpha, df1, df2)
message('fq= ', paste(fq, collapse=', '))
#> fq= 0.893143868317811, 1.12245589410826

print(summary(fstats))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.8818  0.9756  0.9957  0.9954  1.0150  1.1180
fquantiles <- quantile(fstats, probs=c(0.01, 0.05, 0.95, 0.99))
print(fquantiles)
#>        1%        5%       95%       99% 
#> 0.9187308 0.9443490 1.0454607 1.0653691
fracfail <- sum(fstats < fq[1] | fstats > fq[2]) / length(fstats)
message('fracfail= ', fracfail)
#> fracfail= 0.000180844907407407
message('siglevel= ', siglvl)
#> siglevel= 0.05
for(es in names(testpwrs)) {
  message('testpower(ES=', es, '):  ', testpwrs[[es]])
}
#> testpower(ES=1%):  0.0530470375488386
#> testpower(ES=2.5%):  0.0699616296803249
#> testpower(ES=5%):  0.131136121167795
#> testpower(ES=10%):  0.367470031696858

Compute the Shapiro-Wilk stats (the tests themselves have already been run above.)

fracfailsw <- sum(swrslt < siglvl) / length(swrslt)
swpower <- pwr.sw(nrow(fields_m), siglvl)

message('Shapiro-Wilk fracfail= ', fracfailsw)
#> Shapiro-Wilk fracfail= 0.0575810185185185
message('Shapiro-Wilk power= ', swpower)
#> Shapiro-Wilk power= 0.998

Next run the covariance analysis.

iscorr <- function(i,j) {
    cor.test(phi[,i], phi[,j], method='pearson', alternative='two.sided')$p.value < siglvl
}

phi <- do.call(rbind, projts)
## skip i=1 in this test because it is EOF-0 and isn't expected to follow this rule
neof <- ncol(phi)-1
cl <- makeForkCluster()

ncor <- sum(
  parSapplyLB(cl, seq(2, neof-1),        
            function(i) {
              sum(simplify2array(Map(iscorr, i, seq(i+1, ncol(phi)))))
            })
)
stopCluster(cl)
## fraction correlated
fcor <- ncor / (neof*(neof-1)/2)
fcor
#> [1] 0.05063047

## print the power of the test for several hypothetical r values.  Note the test is symmetric in r,
## so we only need to consider positive values
rvals <- c(0.01, 0.05, 0.1)
pwr::pwr.r.test(nrow(phi), rvals, 0.05)
#> 
#>      approximate correlation power calculation (arctangh transformation) 
#> 
#>               n = 1900
#>               r = 0.01, 0.05, 0.10
#>       sig.level = 0.05
#>           power = 0.0720033, 0.5871140, 0.9920493
#>     alternative = two.sided