Best linear unbiased prediction (BLUP) is used in plant breeding experiments for the selection of the best lines in multi-environmental trials (MET).
In this section, a comparison will be made between the H2Cal() function of the inti package (Lozano-Isla, 2020) and the code using the asrml package published by Buntaran et al. (2020).
For the comparison we will use the two methods described by Buntaran et al. (2020). (i) The “two-stage” analysis, using two stages, the first to calculate the BLUEs and the second the BLUPs. (ii) The “one stage” analysis using only one model to calculate the BLUPs.
library(asreml)
library(data.table)
library(plyr)
library(stringr)
asreml.options(maxit=100) # Set asreml iteration
############################
##### Stage I LSMEANS #####
##### per location #####
ww <- data.table(fb)
##### Make column Zone_Loc #####
trials <- nlevels(ww$env)
envs <- levels(ww$env)
##### Make data list for Stage I #####
data_list <- matrix(data=list(), nrow=length(envs), ncol=1,
dimnames=list(envs, c("data_Set")))
##### Make a list of Trials #####
for(i in 1:trials){
print(i)
b <- levels(ww$env)
c <- b[i]
env <- as.factor(c)
env <- data.table(env)
f <- merge(ww,env,by="env")
assign(paste0("data_", b[i]), f)
data_list[[i, "data_Set" ]] <- f
rm(b, c, f, env)
}
data_list <- data.table(ldply(data_list[, "data_Set"], data.frame, .id="env"))
stgI_list <- matrix(data=list(), nrow=length(envs), ncol=1,
dimnames=list(envs, c("lsmeans")))
asreml.options(maxit=100) # Set asreml iteration
############################
##### Stage I LSMEANS #####
##### per location #####
for (i in envs){
edat <- droplevels(subset(ww, env == i))
print(i)
mod.1 <- asreml(fixed = yield ~ cultivar,
random = ~ rep + rep:alpha,
data = edat,
predict = predict.asreml(classify = "cultivar"))
update.asreml(mod.1)
print(summary.asreml(mod.1)$varcomp)
blue <- predict(mod.1, classify="cultivar", levels=levels(edat$cultivar), vcov=TRUE,aliased = T) # get the lsmeans
blue.1 <- data.table(blue$pvals)[, c(1:3)]
names(blue.1) <- c("cultivar", "yield_lsm", "se")
blue.1[ , ':='(var=se^2, smith.w=diag(solve(blue$vcov)))] # calculate the Smith's weight
stgI_list[[i, "lsmeans" ]] <- blue.1 # put all the results of Stage 1 in the list
rm(Edat,mod.1, blue, blue.1)
}
#######################################################
##### Preparing dataset of Stage I for Stage II ######
##### Unlist the results of Stage I and format as data.table #####
stgII_list <- data.table(plyr::ldply(stgI_list[, "lsmeans"], data.frame, .id="env"))
stgII_list$zone<- factor(str_split_fixed(stgII_list$env, "_", 2)[,1]) # Make Zone column by split the record in Zone_Loc column
stgII_list$location <- factor(str_split_fixed(stgII_list$env, "_", 3)[,2]) # Make Location by split the record in Zone_Loc column
stgII_list$year <- factor(str_split_fixed(stgII_list$env, "_", 3)[,3]) # Make Year by split the record in Zone_Loc column
blues.asreml <- stgII_list
############################
##### Stage II BLUPs ######
##### Zone analysis #####
model <- asreml(yield_lsm ~ zone,
random = ~cultivar + zone:location + zone:cultivar + cultivar:zone:location,
weights = smith.w,
family = asr_gaussian(dispersion=1.0), # fix residual variance to 1
data = blues.asreml,
predict = predict.asreml(classify = "cultivar")
)
update.asreml(model)
# print(summary.asreml(model)$varcomp) # print the variance components
blups.asrml <- data.frame((model$predictions$pvals[1:4]))
library(inti)
library(purrr)
#> First stage
envs <- levels(fb$env)
model <- 1:length(envs) %>% map(function(x) {
model <- fb %>% filter(env %in% envs[x]) %>%
H2cal(trait = "yield"
, gen.name = "cultivar"
, rep.n = 4
, ran.model = "1 + (1|rep:alpha) + (1|cultivar)"
, fix.model = "0 + (1|rep:alpha) + cultivar"
, emmeans = F
)
blues <- model$blues %>% mutate(trial = levels(fb$env)[x])
})
blues.h2cal <- bind_rows(model) %>%
separate(trial, c("zone", "location", "year")) %>%
mutate(across(c(yield, smith.w), as.numeric)) %>%
mutate(across(!c(yield, smith.w), as.factor))
#> Second stage
met <- blues.h2cal %>%
mutate(across(!yield, as.factor)) %>%
H2cal(trait = "yield"
, gen.name = "cultivar"
, rep.n = 4
, loc.n = 18
, loc.name = "Location"
, ran.model = "1 + (1|zone:location) + (1|zone:cultivar) + (1|cultivar)"
, fix.model = "0 + (1|zone:location) + (1|zone:cultivar) + cultivar"
, emmeans = T
# , weights = blues.h2cal$smith.w
)
blups.h2cal <- met$blups0 + for avoid intercep and calculate all the BLUEs.emmeans = F to calculate the Smith weitghts in the first stage
library(asreml)
options("scipen"=100,"digits"= 4 )
asreml.options(maxit=100) # Set asreml iteration
##### Fit a single-stage model #####
## incomplete block and replicate location-specific
## location-specifice residual variance
mod <- asreml(fixed = yield ~ zone,
random = ~ rep:at(location) + rep:alpha:at(location) + zone:location +
cultivar + cultivar:zone:location+ cultivar:zone,
residual = ~ dsum(~(units)|location),
data = fb,
predict = predict.asreml(classify = "cultivar"))
update.asreml(mod)
blups.asreml <- data.frame((mod$predictions$pvals[1:4]))
library(inti)
model <- fb %>%
H2cal(trait = "yield"
, gen.name = "cultivar"
, loc.name = "location"
, rep.n = 2
, loc.n = 18
, ran.model = "1 + (1|rep:location) + (1|rep:alpha:location) + (1|cultivar:zone:location) + (1|cultivar:zone) + (1|cultivar:location) + (1|cultivar)"
, fix.model = "0 + (1|rep:location) + (1|rep:alpha:location) + (1|cultivar:zone:location) + (1|cultivar:zone) + (1|cultivar:location) + cultivar"
, summary = T
, emmeans = T
, plot_diag = F
)
## Linear mixed model fit by REML ['lmerMod']
## Formula: yield ~ 1 + (1 | rep:location) + (1 | rep:alpha:location) + (1 |
## cultivar:zone:location) + (1 | cultivar:zone) + (1 | cultivar:location) +
## (1 | cultivar)
## Data: dt.rm
## Weights: weights
##
## REML criterion at convergence: 12023
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.1177 -0.3295 -0.0109 0.3574 4.0993
##
## Random effects:
## Groups Name Variance Std.Dev.
## cultivar:location (Intercept) 0.0005286 0.02299
## cultivar:zone:location (Intercept) 2208.5761240 46.99549
## rep:alpha:location (Intercept) 942.4310254 30.69904
## cultivar:zone (Intercept) 128.9649012 11.35627
## rep:location (Intercept) 42663.3346159 206.55105
## cultivar (Intercept) 727.6650414 26.97527
## Residual 1397.5707455 37.38410
## Number of obs: 1069, groups:
## cultivar:location, 539; cultivar:zone:location, 539; rep:alpha:location, 251; cultivar:zone, 90; rep:location, 36; cultivar, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 847.60 34.93 24.27
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
blups.h2cal <- model$blups