Simulation with GAPIT

source("http://zzlab.net/GAPIT/gapit_functions.txt") 
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: genetics
## Loading required package: combinat
## 
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
## 
##     combn
## Loading required package: gdata
## Registered S3 method overwritten by 'gdata':
##   method         from  
##   reorder.factor gplots
## 
## Attaching package: 'gdata'
## The following object is masked from 'package:gplots':
## 
##     reorder.factor
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
## Loading required package: gtools
## Loading required package: MASS
## Loading required package: mvtnorm
## 
## NOTE: THIS PACKAGE IS NOW OBSOLETE.
## 
##   The R-Genetics project has developed an set of enhanced genetics
##   packages to replace 'genetics'. Please visit the project homepage
##   at http://rgenetics.org for informtion.
## 
## 
## Attaching package: 'genetics'
## The following objects are masked from 'package:base':
## 
##     %in%, as.factor, order
## Loading required package: ape
## Loading required package: compiler
## Loading required package: grid
## Loading required package: bigmemory
## Loading required package: EMMREML
## Loading required package: Matrix
## Loading required package: scatterplot3d
## Loading required package: lme4
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) 
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

set.seed(99164)
n=nrow(myGD)
testing=sample(n,round(n/5),replace=F) # round(n/5) calculates 20% of n, rounding to the nearest whole number, so that roughly 1/5 of the data is selected for testing.
training=-testing # results in a vector of negative values

set.seed(99164) 
mySim=GAPIT.Phenotype.Simulation(
  GD=myGD, 
  GM=myGM,
  h2=.7,
  NQTN=20, 
  QTNDist="normal"
  )

GWAS and MAS (20 QTNs)

#Gene mapping (GWAS)
myGAPIT3 <- GAPIT(
  Y=mySim$Y[training,],
  GD=myGD,
  GM=myGM, 
  PCA.total=3,
  model="BLINK", 
  QTN.position=mySim$QTN.position,
  memo="GWAS"
)
head(myGAPIT3$GWAS)
##          SNP Chromosome Position      P.value       maf nobs       effect
## 1 PZB00859.1          1   157104 7.030232e-01 0.2422222  225 -0.077526150
## 2 PZA01271.1          1  1947984 9.947272e-01 0.4822222  225  0.001229805
## 3 PZA03613.2          1  2914066 2.111030e-06 0.2911111  225  0.836125040
## 4 PZA03613.1          1  2914171 4.259927e-01 0.2266667  225  0.196909271
## 5 PZA03614.2          1  2915078 6.105079e-01 0.4911111  225  0.128731807
## 6 PZA03614.1          1  2915242 2.019342e-01 0.4777778  225 -0.348620837
head(myGAPIT3$PCA)
##    taxa         PC1        PC2         PC3
## 1 33-16   1.6780775 -4.9373382   1.0029279
## 2 38-11  -1.6021749 -4.7322790  -0.6886187
## 3  4226  -0.8999517 -6.2186090   2.2737655
## 4  4722   2.1334477 -6.2879301   5.6837161
## 5  A188   0.6302372 -4.8947416   0.8189001
## 6 A214N -13.6690754  0.8736302 -21.6324613
index = myGAPIT3$GWAS[,4] < 0.05/length(myGAPIT3$GWAS[,4])
# Ordering QTN by Effect Size
QTN.order = order(abs(mySim$effect), decreasing = T) # contains the sorted indices of the most influential QTNs.
# adjusts the position index to match column indexing in myGD
index = mySim$QTN.position[QTN.order] + 1 
# Combining PCA Data with Selected Markers
myQTN = cbind(myGAPIT3$PCA, myGD[, c(FALSE, index[1:15])])

Simulation with GAPIT

source("http://zzlab.net/GAPIT/gapit_functions.txt") 
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) 
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

set.seed(99164)
n=nrow(myGD)
testing=sample(n,round(n/5),replace=F)
training=-testing

set.seed(99164) 
mySim=GAPIT.Phenotype.Simulation(GD=myGD, GM=myGM,
h2=.7,
NQTN=20, 
QTNDist="normal")

gBLUP(20 QTNs)

#gBLUP
myGAPIT5 <- GAPIT(
  Y=mySim$Y[training,],
  GD=myGD, 
  GM=myGM,
  PCA.total=3,
  model="gBLUP", 
  SNP.test=FALSE
  )
order=match(mySim$Y[,1],myGAPIT5$Pred[,1])
myPred=myGAPIT5$Pred[order,]
ru2=cor(myPred[testing,8],mySim$u[testing])^2
plot(myPred[testing,8],mySim$u[testing])
mtext(paste("R square=",ru2,sep=""), side = 3)

Simulation with GAPIT

source("http://zzlab.net/GAPIT/gapit_functions.txt") 
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) 
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

taxa=myGD[,1]
X=myGD[,-1]

set.seed(99164) 
mySim=GAPIT.Phenotype.Simulation(GD=myGD, GM=myGM,
h2=.7,
NQTN=100, 
QTNDist="normal")

Ridge Regression vs. gBLUP

#Import rrBLUP
#install.packages("rrBLUP") 
library(rrBLUP)

#prepare data
y <- mySim$Y[,2]
# # Convert genotype data (X) to a numeric matrix for analysis
M=as.matrix(X)

#Ridge Regression
 # Perform Ridge Regression  
 # This solves the mixed model equation: y = M * u + e, where:  
 # - y is the phenotype vector  
 # - M is the genotype matrix  
 # - u is the effect of markers  
 # - e is the residual error  
ans1 <- mixed.solve(y=y, Z=M)

#gBLUP
# Compute the genomic relationship matrix (K)
# K is calculated as the cross-product of M (K = MM'), capturing genetic similarities
K <- tcrossprod(M) 
# Perform GBLUP (Genomic Best Linear Unbiased Prediction)
# This model estimates breeding values using the genomic relationship matrix (K)
ans2 <- mixed.solve(y=y, K=K)

#Compare GEBV
plot(M %*% ans1$u, ans2$u, 
     xlab = "Ridge Regression GEBV", 
     ylab = "G-BLUP GEBV", 
     main = "Comparison of GEBV from Ridge Regression and GBLUP")

rrBLUP vs GAPIT

myGAPIT <- GAPIT(
  Y=mySim$Y,
  GD=myGD,
  GM=myGM,
  model="gBLUP",
  file.output=F)
order.raw=match(taxa, myGAPIT$Pred[,1])
plot(ans2$u, myGAPIT$Pred[order.raw, 8]) 

first=c("c","a","b","d")

second=c("a","d","c","e","f")
match(first,second)
## [1]  3  1 NA  2