Best linear unbiased prediction (BLUP) is used in plant breeding experiments for the selection of the best lines in multi-environmental trials (MET).

In this section, a comparison will be made between the H2Cal() function of the inti package (Lozano-Isla, 2020) and the code using the asrml package published by Buntaran et al. (2020).

For the comparison we will use the two methods described by Buntaran et al. (2020). (i) The “two-stage” analysis, using two stages, the first to calculate the BLUEs and the second the BLUPs. (ii) The “one stage” analysis using only one model to calculate the BLUPs.

Load data

Two-stage analysis

asreml

library(asreml)
library(data.table)
library(plyr)
library(stringr)
asreml.options(maxit=100) # Set asreml iteration

############################
##### Stage I LSMEANS #####
##### per location   #####

ww <- data.table(fb)

##### Make column Zone_Loc #####

trials     <- nlevels(ww$env)
envs <- levels(ww$env)

##### Make data list for Stage I #####
data_list <- matrix(data=list(), nrow=length(envs), ncol=1, 
                    dimnames=list(envs, c("data_Set")))

##### Make a list of Trials #####
for(i in 1:trials){
  print(i)
  b <- levels(ww$env)
  c <- b[i]
  env <- as.factor(c)
  env <- data.table(env)
  f <- merge(ww,env,by="env")
  assign(paste0("data_", b[i]), f)
  data_list[[i, "data_Set" ]] <- f
  
  rm(b, c, f, env)
}

data_list <- data.table(ldply(data_list[, "data_Set"], data.frame, .id="env"))

stgI_list <- matrix(data=list(), nrow=length(envs), ncol=1, 
                    dimnames=list(envs, c("lsmeans")))

asreml.options(maxit=100) # Set asreml iteration

############################
##### Stage I LSMEANS #####
##### per location   #####

for (i in envs){
  
  edat <- droplevels(subset(ww, env == i))
  
  print(i)
  
  mod.1 <- asreml(fixed     = yield ~ cultivar,
                  random      = ~ rep + rep:alpha,
                  data        = edat,
                  predict     = predict.asreml(classify = "cultivar"))
  
  update.asreml(mod.1)
  print(summary.asreml(mod.1)$varcomp)
  
  blue <- predict(mod.1, classify="cultivar", levels=levels(edat$cultivar), vcov=TRUE,aliased = T) # get the lsmeans
  blue.1 <- data.table(blue$pvals)[, c(1:3)] 
  names(blue.1) <- c("cultivar", "yield_lsm", "se")
  blue.1[ , ':='(var=se^2, smith.w=diag(solve(blue$vcov)))] # calculate the Smith's weight
  
  stgI_list[[i, "lsmeans" ]] <- blue.1 # put all the results of Stage 1 in the list
  
  rm(Edat,mod.1, blue, blue.1)
}

#######################################################
##### Preparing dataset of Stage I for Stage II ######

##### Unlist the results of Stage I and format as data.table #####
stgII_list <- data.table(plyr::ldply(stgI_list[, "lsmeans"], data.frame, .id="env"))

stgII_list$zone<- factor(str_split_fixed(stgII_list$env, "_", 2)[,1]) # Make Zone column by split the record in Zone_Loc column
stgII_list$location <- factor(str_split_fixed(stgII_list$env, "_", 3)[,2])  # Make Location by split the record in Zone_Loc column
stgII_list$year <- factor(str_split_fixed(stgII_list$env, "_", 3)[,3]) # Make Year by split the record in Zone_Loc column

blues.asreml <- stgII_list

############################
##### Stage II BLUPs ######
##### Zone analysis #####

model <- asreml(yield_lsm  ~ zone,
                random    = ~cultivar + zone:location + zone:cultivar + cultivar:zone:location,
                weights   = smith.w,
                family    = asr_gaussian(dispersion=1.0), # fix residual variance to 1
                data      = blues.asreml,
                predict   = predict.asreml(classify = "cultivar")
                )

update.asreml(model)

# print(summary.asreml(model)$varcomp) # print the variance components

blups.asrml <- data.frame((model$predictions$pvals[1:4]))

H2cal

library(inti)
library(purrr)

#> First stage

envs <- levels(fb$env)

model <- 1:length(envs) %>% map(function(x) {
  
  model <- fb %>% filter(env %in% envs[x]) %>% 
    
    H2cal(trait = "yield"
          , gen.name = "cultivar"
          , rep.n = 4
          , ran.model = "1 + (1|rep:alpha) + (1|cultivar)"
          , fix.model = "0 + (1|rep:alpha) + cultivar"
          , emmeans = F
          )
  
  blues <- model$blues %>% mutate(trial = levels(fb$env)[x])
  
  })

blues.h2cal <- bind_rows(model) %>% 
  separate(trial, c("zone", "location", "year")) %>% 
  mutate(across(c(yield, smith.w), as.numeric)) %>% 
  mutate(across(!c(yield, smith.w), as.factor))

#> Second stage

met <- blues.h2cal %>%
  mutate(across(!yield, as.factor)) %>%
  H2cal(trait = "yield"
        , gen.name = "cultivar"
        , rep.n = 4
        , loc.n = 18
        , loc.name = "Location"
        , ran.model = "1 + (1|zone:location) + (1|zone:cultivar) + (1|cultivar)"
        , fix.model = "0 + (1|zone:location) + (1|zone:cultivar) + cultivar"
        , emmeans = T
        # , weights = blues.h2cal$smith.w
        )

blups.h2cal <- met$blups
  • First stage
    • Fixed model with 0 + for avoid intercep and calculate all the BLUEs.
    • emmeans = F to calculate the Smith weitghts in the first stage

BLUEs comparison

blues.comp <- merge(blues.asreml 
                    , blues.h2cal 
                    , by = c("cultivar", "zone", "location"))

rs <- cor(blues.comp$yield, blues.comp$yield_lsm) # 0.9999286
rs
## [1] 0.9999286

blues.comp %>% web_table()

The BLUEs correlation between H2Cal and asrml is (r = 0.9999286)

BLUPs comparison

blups.comp <- merge(blups.asrml, blups.h2cal
                    , by = c("cultivar"))

rs <- cor(blups.comp$yield, blups.comp$predicted.value) # 0.9820397
rs
## [1] 0.9820397

blups.comp %>% web_table()

The BLUPs correlation between H2Cal and asrml is (r = 0.9820397)

Single-stage analysis

asreml

library(asreml)

options("scipen"=100,"digits"= 4 )
asreml.options(maxit=100) # Set asreml iteration

##### Fit a single-stage model #####
## incomplete block and replicate location-specific
## location-specifice residual variance

mod <- asreml(fixed       = yield ~ zone,
              random      = ~ rep:at(location) + rep:alpha:at(location) + zone:location + 
                             cultivar + cultivar:zone:location+ cultivar:zone,
              residual    = ~ dsum(~(units)|location),
              data        = fb,
              predict     = predict.asreml(classify = "cultivar"))

update.asreml(mod)

blups.asreml <- data.frame((mod$predictions$pvals[1:4])) 

H2cal

library(inti)

model <- fb %>% 
  H2cal(trait = "yield"
        , gen.name = "cultivar"
        , loc.name = "location"
        , rep.n = 2
        , loc.n = 18
        , ran.model = "1 + (1|rep:location) + (1|rep:alpha:location) + (1|cultivar:zone:location) + (1|cultivar:zone) + (1|cultivar:location) + (1|cultivar)"
        , fix.model = "0 + (1|rep:location) + (1|rep:alpha:location) + (1|cultivar:zone:location) + (1|cultivar:zone) + (1|cultivar:location) + cultivar"
        , summary = T
        , emmeans = T
        , plot_diag = F
        )
## Linear mixed model fit by REML ['lmerMod']
## Formula: yield ~ 1 + (1 | rep:location) + (1 | rep:alpha:location) + (1 |  
##     cultivar:zone:location) + (1 | cultivar:zone) + (1 | cultivar:location) +  
##     (1 | cultivar)
##    Data: dt.rm
## Weights: weights
## 
## REML criterion at convergence: 12023
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1177 -0.3295 -0.0109  0.3574  4.0993 
## 
## Random effects:
##  Groups                 Name        Variance      Std.Dev. 
##  cultivar:location      (Intercept)     0.0005286   0.02299
##  cultivar:zone:location (Intercept)  2208.5761240  46.99549
##  rep:alpha:location     (Intercept)   942.4310254  30.69904
##  cultivar:zone          (Intercept)   128.9649012  11.35627
##  rep:location           (Intercept) 42663.3346159 206.55105
##  cultivar               (Intercept)   727.6650414  26.97527
##  Residual                            1397.5707455  37.38410
## Number of obs: 1069, groups:  
## cultivar:location, 539; cultivar:zone:location, 539; rep:alpha:location, 251; cultivar:zone, 90; rep:location, 36; cultivar, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   847.60      34.93   24.27
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues

blups.h2cal <- model$blups 

BLUPs comparison

blups.comp <- merge(blups.asreml, blups.h2cal, by = c("cultivar"))

rs <- cor(blups.comp$predicted.value, blups.comp$yield) # 0.9202
rs
## [1] 0.9202313

blups.comp %>% web_table()

The BLUPs correlation between H2Cal and asrml is (r = 0.9202313)

References

Buntaran, H., H.-P. Piepho, P. Schmidt, J. Rydén, M. Halling, et al. 2020. Cross-validation of stagewise mixed-model analysis of Swedish variety trials with winter wheat and spring barley. Crop Science 60(5): 2221–2240. doi: 10.1002/csc2.20177.
Lozano-Isla, F. 2020. Inti: Tools and statistical procedures in plant science.