In this notebook we will generate a larger ensemble of fields and run some statistical analyses.
These calculations were run with fldgen-v1.0.0
These are the parameters we will be using for the calculations below.
## parameters for the code below.
ngen <- 20 # number of fields to generate
set.seed(8675309) # Set RNG seed so results will be reproducible
siglvl <- 0.05 # significance level for statistical tests
method <- 1 # method to use in mkcorrts
datadir <- './data'
emulator <- train(datadir, latvar='lat_2', lonvar='lon_2')
Generate the fields. We’ll save the basis projections, but we’ll just run statistics on the reconstituted fields (they take a lot of memory otherwise.)
## NB: You cannot do the first step in parallel because despite appearances, it is not a pure
## function. Each call depends on the last via the RNG! We could fix this by using a RNG
## with independent streams, if it looks like a problem.
projts <- lapply(1:ngen, function(x) {mkcorrts(emulator, method=method)})
wrkfun <- function(bcoord) {
reconst_fields(emulator$reof$rotation, bcoord)
}
cl <- makeForkCluster()
fields_m <- parLapply(cl, projts, wrkfun) %>% do.call(rbind, .)
## compute the variance in each grid cell
cellvar <- parApply(cl, fields_m, 2, var)
swrslt <- parApply(cl, fields_m, 2, swpval)
stopCluster(cl)
Run the variance analysis.
obscellvar <- apply(emulator$pscl$r, 2, var)
fstats <- cellvar / obscellvar
df1 <- nrow(fields_m) - 1
df2 <- nrow(emulator$pscl$r) - 1
testpwrs <- sapply(c('1%'=0.01, '2.5%'=0.025, '5%'=0.05, '10%'=0.1),
function(x) {power.f(df1, df2, x, siglvl)})
falpha <- c(siglvl/2, 1-siglvl/2) # critical F-values for a two-tailed test
fq <- qf(falpha, df1, df2)
message('fq= ', paste(fq, collapse=', '))
#> fq= 0.893143868317811, 1.12245589410826
print(summary(fstats))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.8818 0.9756 0.9957 0.9954 1.0150 1.1180
fquantiles <- quantile(fstats, probs=c(0.01, 0.05, 0.95, 0.99))
print(fquantiles)
#> 1% 5% 95% 99%
#> 0.9187308 0.9443490 1.0454607 1.0653691
fracfail <- sum(fstats < fq[1] | fstats > fq[2]) / length(fstats)
message('fracfail= ', fracfail)
#> fracfail= 0.000180844907407407
message('siglevel= ', siglvl)
#> siglevel= 0.05
for(es in names(testpwrs)) {
message('testpower(ES=', es, '): ', testpwrs[[es]])
}
#> testpower(ES=1%): 0.0530470375488386
#> testpower(ES=2.5%): 0.0699616296803249
#> testpower(ES=5%): 0.131136121167795
#> testpower(ES=10%): 0.367470031696858
Compute the Shapiro-Wilk stats (the tests themselves have already been run above.)
fracfailsw <- sum(swrslt < siglvl) / length(swrslt)
swpower <- pwr.sw(nrow(fields_m), siglvl)
message('Shapiro-Wilk fracfail= ', fracfailsw)
#> Shapiro-Wilk fracfail= 0.0575810185185185
message('Shapiro-Wilk power= ', swpower)
#> Shapiro-Wilk power= 0.998
Next run the covariance analysis.
iscorr <- function(i,j) {
cor.test(phi[,i], phi[,j], method='pearson', alternative='two.sided')$p.value < siglvl
}
phi <- do.call(rbind, projts)
## skip i=1 in this test because it is EOF-0 and isn't expected to follow this rule
neof <- ncol(phi)-1
cl <- makeForkCluster()
ncor <- sum(
parSapplyLB(cl, seq(2, neof-1),
function(i) {
sum(simplify2array(Map(iscorr, i, seq(i+1, ncol(phi)))))
})
)
stopCluster(cl)
## fraction correlated
fcor <- ncor / (neof*(neof-1)/2)
fcor
#> [1] 0.05063047
## print the power of the test for several hypothetical r values. Note the test is symmetric in r,
## so we only need to consider positive values
rvals <- c(0.01, 0.05, 0.1)
pwr::pwr.r.test(nrow(phi), rvals, 0.05)
#>
#> approximate correlation power calculation (arctangh transformation)
#>
#> n = 1900
#> r = 0.01, 0.05, 0.10
#> sig.level = 0.05
#> power = 0.0720033, 0.5871140, 0.9920493
#> alternative = two.sided