1. Load Required Libraries

library(rrBLUP)  # For rrBLUP's mixed.solve
library(MASS)    # For lm.ridge
library(ridge)   # For linearRidge
library(BGLR)    # For Bayesian ridge regression
library(gridExtra) # For arranging plots
library(ggplot2)

2. Data and simulation

source("http://zzlab.net/GAPIT/gapit_functions.txt") 
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

set.seed(123) 
mySim=GAPIT.Phenotype.Simulation(GD=myGD, GM=myGM,
                                 h2=.7,
                                 NQTN=20, 
                                 QTNDist="normal")

X <- myGD[,-1]
y <- mySim$Y$Sim

# Data preparation
set.seed(123)
X <- as.matrix(X)  # Ensure matrix
# Clean X: remove NA, Inf, zero variance
X[is.na(X)] <- mean(X, na.rm = TRUE)
X[is.infinite(X)] <- mean(X[!is.infinite(X)], na.rm = TRUE)
var_cols <- apply(X, 2, var, na.rm = TRUE)
X <- X[, var_cols > 0]

n <- nrow(X)
train_index <- sample(1:n, size = 0.8 * n)
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]

# Verify dimensions
cat("X_train dim:", dim(X_train), "\n")  
## X_train dim: 224 2953
cat("X_test dim:", dim(X_test), "\n")    
## X_test dim: 57 2953
cat("y_train length:", length(y_train), "\n")
## y_train length: 224
cat("y_test length:", length(y_test), "\n")
## y_test length: 57

3. Apply Each Method and Compute Predictions

1) rrBLUP (using mixed.solve)

# Fit model on training data
fit_rrblup <- mixed.solve(y = y_train, Z = X_train)
# Extract marker effects (u) and intercept (beta)
u_rrblup <- as.vector(fit_rrblup$u)
beta <- as.numeric(fit_rrblup$beta)

# Predict test set: intercept + X_test %*% marker effects
pred_rrblup <- beta + X_test %*% u_rrblup
# Calculate accuracy
accuracy_rrblup <- cor(pred_rrblup, y_test)
# Extract lambda for use in other methods
lambda <- fit_rrblup$Ve / fit_rrblup$Vu
cat("Accuracy rrBLUP:", accuracy_rrblup, "\n")
## Accuracy rrBLUP: 0.4151238

2) lm.ridge (from MASS)

lm.ridge performs ridge regression, centering predictors internally. Use the λ from rrBLUP

xm <- colMeans(X_train)  # Center test data consistently
fit_lm_ridge <- lm.ridge(y_train ~ X_train, lambda = lambda)
# Predict: mean(y_train) + centered X_test %*% coefficients
pred_lm_ridge <- mean(y_train) + (X_test - xm) %*% fit_lm_ridge$coef
accuracy_lm_ridge <- cor(pred_lm_ridge, y_test)
cat("Accuracy lm.ridge:", accuracy_lm_ridge, "\n")
## Accuracy lm.ridge: 0.4697463

3) ridge (using linearRidge)

linearRidge also supports ridge regression with a specified λ

# Convert X_train to data frame for formula interface
X_train_df <- as.data.frame(X_train)
X_test_df <- as.data.frame(X_test)
# Fit linearRidge with correct scaling
fit_ridge <- linearRidge(y_train ~ ., data = X_train_df, lambda = lambda, scaling = "none")
# Predict using the predict method
pred_ridge <- predict(fit_ridge, newdata = X_test_df)
accuracy_ridge <- cor(pred_ridge, y_test)
cat("Accuracy linearRidge:", accuracy_ridge, "\n")
## Accuracy linearRidge: 0.4151238

4) BGLR’s Bayesian ridge regression (BRR)

ETA <- list(list(X = X_train, model = "BRR"))
fit_bglr <- BGLR(y = y_train, ETA = ETA, nIter = 1000, burnIn = 200)
# Predict: overall mean + X_test %*% marker effects
pred_bglr <- fit_bglr$mu + X_test %*% fit_bglr$ETA[[1]]$b
accuracy_bglr <- cor(pred_bglr, y_test)
cat("Accuracy BGLR:", accuracy_bglr, "\n")
## Accuracy BGLR: 0.4331929

4. Compare Accuracies

# Accuracy Summary
accuracies <- data.frame(
  Method = c("rrBLUP", "lm.ridge", "linearRidge", "BGLR"),
  Accuracy = c(accuracy_rrblup, accuracy_lm_ridge, accuracy_ridge, accuracy_bglr)
)
knitr::kable(accuracies, digits = 4, caption = "Prediction Accuracies")
Prediction Accuracies
Method Accuracy
rrBLUP 0.4151
lm.ridge 0.4697
linearRidge 0.4151
BGLR 0.4332
# Bar Plot of Accuracies
ggplot(accuracies, aes(x = Method, y = Accuracy, fill = Method)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Prediction Accuracy by Method", y = "Correlation", x = "") +
  scale_fill_manual(values = c("rrBLUP" = "#1b9e77", "lm.ridge" = "#d95f02", 
                               "linearRidge" = "#7570b3", "BGLR" = "#e7298a")) +
  theme(legend.position = "none")

# Scatter Plots of Predicted vs. Observed
# Create data frames for plotting
df_rrblup <- data.frame(Predicted = pred_rrblup, Observed = y_test)
df_lm_ridge <- data.frame(Predicted = pred_lm_ridge, Observed = y_test)
df_ridge <- data.frame(Predicted = pred_ridge, Observed = y_test)
df_bglr <- data.frame(Predicted = pred_bglr, Observed = y_test)

# Define scatter plot function
scatter_plot <- function(df, title) {
  ggplot(df, aes(x = Observed, y = Predicted)) +
    geom_point(color = "blue", alpha = 0.6) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
    theme_minimal() +
    labs(title = title, x = "Observed Phenotype", y = "Predicted Phenotype") +
    coord_fixed(ratio = 1)
}

# Create plots
p1 <- scatter_plot(df_rrblup, "rrBLUP")
p2 <- scatter_plot(df_lm_ridge, "lm.ridge")
p3 <- scatter_plot(df_ridge, "linearRidge")
p4 <- scatter_plot(df_bglr, "BGLR")

# Arrange plots
grid.arrange(p1, p2, p3, p4, ncol = 2)