Simulation with GAPIT
source("http://zzlab.net/GAPIT/gapit_functions.txt")
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: genetics
## Loading required package: combinat
##
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
##
## combn
## Loading required package: gdata
## Registered S3 method overwritten by 'gdata':
## method from
## reorder.factor gplots
##
## Attaching package: 'gdata'
## The following object is masked from 'package:gplots':
##
## reorder.factor
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
## Loading required package: gtools
## Loading required package: MASS
## Loading required package: mvtnorm
##
## NOTE: THIS PACKAGE IS NOW OBSOLETE.
##
## The R-Genetics project has developed an set of enhanced genetics
## packages to replace 'genetics'. Please visit the project homepage
## at http://rgenetics.org for informtion.
##
##
## Attaching package: 'genetics'
## The following objects are masked from 'package:base':
##
## %in%, as.factor, order
## Loading required package: ape
## Loading required package: compiler
## Loading required package: grid
## Loading required package: bigmemory
## Loading required package: EMMREML
## Loading required package: Matrix
## Loading required package: scatterplot3d
## Loading required package: lme4
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)
set.seed(99164)
n=nrow(myGD)
testing=sample(n,round(n/5),replace=F) # round(n/5) calculates 20% of n, rounding to the nearest whole number, so that roughly 1/5 of the data is selected for testing.
training=-testing # results in a vector of negative values
set.seed(99164)
mySim=GAPIT.Phenotype.Simulation(
GD=myGD,
GM=myGM,
h2=.7,
NQTN=20,
QTNDist="normal"
)
GWAS and MAS (20 QTNs)
#Gene mapping (GWAS)
myGAPIT3 <- GAPIT(
Y=mySim$Y[training,],
GD=myGD,
GM=myGM,
PCA.total=3,
model="BLINK",
QTN.position=mySim$QTN.position,
memo="GWAS"
)
head(myGAPIT3$GWAS)
## SNP Chromosome Position P.value maf nobs effect
## 1 PZB00859.1 1 157104 7.030232e-01 0.2422222 225 -0.077526150
## 2 PZA01271.1 1 1947984 9.947272e-01 0.4822222 225 0.001229805
## 3 PZA03613.2 1 2914066 2.111030e-06 0.2911111 225 0.836125040
## 4 PZA03613.1 1 2914171 4.259927e-01 0.2266667 225 0.196909271
## 5 PZA03614.2 1 2915078 6.105079e-01 0.4911111 225 0.128731807
## 6 PZA03614.1 1 2915242 2.019342e-01 0.4777778 225 -0.348620837
head(myGAPIT3$PCA)
## taxa PC1 PC2 PC3
## 1 33-16 1.6780775 -4.9373382 1.0029279
## 2 38-11 -1.6021749 -4.7322790 -0.6886187
## 3 4226 -0.8999517 -6.2186090 2.2737655
## 4 4722 2.1334477 -6.2879301 5.6837161
## 5 A188 0.6302372 -4.8947416 0.8189001
## 6 A214N -13.6690754 0.8736302 -21.6324613
index = myGAPIT3$GWAS[,4] < 0.05/length(myGAPIT3$GWAS[,4])
# Ordering QTN by Effect Size
QTN.order = order(abs(mySim$effect), decreasing = T) # contains the sorted indices of the most influential QTNs.
# adjusts the position index to match column indexing in myGD
index = mySim$QTN.position[QTN.order] + 1
# Combining PCA Data with Selected Markers
myQTN = cbind(myGAPIT3$PCA, myGD[, c(FALSE, index[1:15])])
Simulation with GAPIT
source("http://zzlab.net/GAPIT/gapit_functions.txt")
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)
set.seed(99164)
n=nrow(myGD)
testing=sample(n,round(n/5),replace=F)
training=-testing
set.seed(99164)
mySim=GAPIT.Phenotype.Simulation(GD=myGD, GM=myGM,
h2=.7,
NQTN=20,
QTNDist="normal")
gBLUP(20 QTNs)
#gBLUP
myGAPIT5 <- GAPIT(
Y=mySim$Y[training,],
GD=myGD,
GM=myGM,
PCA.total=3,
model="gBLUP",
SNP.test=FALSE
)
order=match(mySim$Y[,1],myGAPIT5$Pred[,1])
myPred=myGAPIT5$Pred[order,]
ru2=cor(myPred[testing,8],mySim$u[testing])^2
plot(myPred[testing,8],mySim$u[testing])
mtext(paste("R square=",ru2,sep=""), side = 3)

Simulation with GAPIT
source("http://zzlab.net/GAPIT/gapit_functions.txt")
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)
taxa=myGD[,1]
X=myGD[,-1]
set.seed(99164)
mySim=GAPIT.Phenotype.Simulation(GD=myGD, GM=myGM,
h2=.7,
NQTN=100,
QTNDist="normal")
Ridge Regression vs. gBLUP
#Import rrBLUP
#install.packages("rrBLUP")
library(rrBLUP)
#prepare data
y <- mySim$Y[,2]
# # Convert genotype data (X) to a numeric matrix for analysis
M=as.matrix(X)
#Ridge Regression
# Perform Ridge Regression
# This solves the mixed model equation: y = M * u + e, where:
# - y is the phenotype vector
# - M is the genotype matrix
# - u is the effect of markers
# - e is the residual error
ans1 <- mixed.solve(y=y, Z=M)
#gBLUP
# Compute the genomic relationship matrix (K)
# K is calculated as the cross-product of M (K = MM'), capturing genetic similarities
K <- tcrossprod(M)
# Perform GBLUP (Genomic Best Linear Unbiased Prediction)
# This model estimates breeding values using the genomic relationship matrix (K)
ans2 <- mixed.solve(y=y, K=K)
#Compare GEBV
plot(M %*% ans1$u, ans2$u,
xlab = "Ridge Regression GEBV",
ylab = "G-BLUP GEBV",
main = "Comparison of GEBV from Ridge Regression and GBLUP")

rrBLUP vs GAPIT
myGAPIT <- GAPIT(
Y=mySim$Y,
GD=myGD,
GM=myGM,
model="gBLUP",
file.output=F)
order.raw=match(taxa, myGAPIT$Pred[,1])
plot(ans2$u, myGAPIT$Pred[order.raw, 8])

first=c("c","a","b","d")
second=c("a","d","c","e","f")
match(first,second)
## [1] 3 1 NA 2