VanRaden Kinship Algorithm
# Read genotype data from zzlab and store it as a data frame
myGD = read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt", head=T)
# Remove the first column (IDs) to keep only the genotype matrix
X = myGD[, -1]
# Compute allele frequencies (bi-allelic SNPs, so divide by 2)
p = colMeans(X) / 2
# Center the genotype matrix by subtracting 1 (allele coding {0,1,2} → {-1,0,1})
M = X - 1
# Standardize the genotype matrix to create the Z matrix
# t(M) converts M from an individuals × SNPs matrix to an SNPs × individuals matrix.
# Subtracting 0.5 centers the allele frequency around zero
Z = t(M) - 2 * (p - 0.5)
# Compute the kinship matrix
# crossprod(A, B) is a shortcut for matrix multiplication
# Since Z is an SNPs × Individuals matrix, its transpose (Z^T) becomes Individuals × SNPs.
# The multiplication Z^T Z results in an Individuals × Individuals matrix.
K = crossprod(Z, Z)
# Adjust for allele frequency variance to normalize the kinship matrix
# p is the allele frequency for each SNP.
# 1 - p is the frequency of the alternative allele.
# p * (1 - p) gives the genetic variance at each SNP.
adj = 2 * sum(p * (1 - p))
K = K / adj
# Visualize the kinship matrix using a heatmap
heatmap(K)

heatmap(K, Colv = NA,Rowv = NA)

Zhang Kinship Algorithm
a = c(0,1,2,0,0,1,2,1,0,1,2,2)
# representing genotypes for 3 individuals across 4 SNPs
snps = matrix(a, 3, 4, byrow=T)
snps # Print the SNP matrix
## [,1] [,2] [,3] [,4]
## [1,] 0 1 2 0
## [2,] 0 1 2 1
## [3,] 0 1 2 2
# Compute the (column-wise mean) of each SNP
snpMean = apply(snps, 2, mean)
snpMean # Print the mean values of each SNP
## [1] 0 1 2 1
# Subtract the mean of each SNP from all its values to center the data
snps = t(snps) - snpMean
# Transpose back to retain original structure (individuals as rows, SNPs as columns)
t(snps)
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 -1
## [2,] 0 0 0 0
## [3,] 0 0 0 1
# Compute the kinship matrix u
K = crossprod(snps, snps)
K # Print the kinship matrix
## [,1] [,2] [,3]
## [1,] 1 0 -1
## [2,] 0 0 0
## [3,] -1 0 1
Rescale between 0 and 2 for inbred
a = c(0,1,2,0,1,1,2,1,0,2,2,1)
# representing genotypes for 3 individuals across 4 SNPs
snps = matrix(a, 3, 4, byrow=T)
snps
## [,1] [,2] [,3] [,4]
## [1,] 0 1 2 0
## [2,] 1 1 2 1
## [3,] 0 2 2 1
# Calculate allele frequencies for each SNP
fa = colSums(snps) / (2 * nrow(snps)) # Sum up allele counts and divide by 2 * number of individuals
fa
## [1] 0.1666667 0.6666667 1.0000000 0.3333333
# Identify SNPs with fixed alleles (frequency = 0 or 1) and remove them
index.non = fa >= 1 | fa <= 0 # Create a logical index for SNPs that are monomorphic (no variation)
snps = snps[, !index.non] # Keep only polymorphic SNPs (those with variation)
snps
## [,1] [,2] [,3]
## [1,] 0 1 0
## [2,] 1 1 1
## [3,] 0 2 1
# Compute heterozygosity matrix (1 if heterozygous, 0 otherwise)
het = 1 - abs(snps - 1) # If the SNP value is 1 (heterozygous), result is 1, otherwise 0
het
## [,1] [,2] [,3]
## [1,] 0 1 0
## [2,] 1 1 1
## [3,] 0 0 1
# Calculate individual heterozygosity by summing heterozygous SNPs per individual
ind.sum = rowSums(het) # Total number of heterozygous sites per individual
ind.sum
## [1] 1 3 1
# Compute individual inbreeding coefficient: (1 - observed heterozygosity)
fi = ind.sum / (2 * ncol(snps)) # Observed heterozygosity per individual
fi
## [1] 0.1666667 0.5000000 0.1666667
inbreeding = 1 - min(fi) # Inbreeding coefficient is 1 minus the lowest heterozygosity
inbreeding
## [1] 0.8333333
# Get the number of SNPs and individuals
nSNP = ncol(snps) # Total number of SNPs after filtering
nInd = nrow(snps) # Total number of individuals
n = nInd # Store the number of individuals in a variable n
# Compute the mean genotype value for each SNP
snpMean = apply(snps, 2, mean) # Get the average genotype value per SNP
# Center the genotype matrix by subtracting the SNP mean (column-wise operation)
snps = t(snps) - snpMean # Transpose to apply column-wise mean subtraction, then transpose back
# Compute the kinship matrix
# K = tcrossprod(snps, snps) # Alternative method
K = crossprod(snps, snps) # Equivalent to snps' %*% snps, computing X'X
# Extracting Diagonal Elements
i = 1:n # Generate a sequence from 1 to n (number of individuals)
j = (i-1) * n # Compute indices for diagonal elements in the flattened matrix
index = i + j # Diagonal indices in vectorized form
d = K[index] # Extract diagonal elements (self-kinship values)
d
## [1] 0.6666667 0.6666667 0.6666667
# Compute Key Values for Rescaling
DL = min(d) # Smallest diagonal value
DU = max(d) # Largest diagonal value
floor = min(K) # Minimum value in the entire kinship matrix
# Normalize K Between 0 and 2
top = 1 + inbreeding # Set the upper limit based on the inbreeding coefficient
K = top * (K - floor) / (DU - floor) # Rescale K so that values range from 0 to 'top'
Dmin = top * (DL - floor) / (DU - floor) # Compute the new minimum diagonal value after rescaling
# Adjust Diagonal if It's Below Expected (1)
if (Dmin < 1) {
print("Adjustment by the minimum diagonal")
# Adjust diagonal values to ensure self-relatedness is at least 1
K[index] = (K[index] - Dmin + 1) / ((top + 1 - Dmin) * 0.5)
# Rescale off-diagonal values to maintain overall kinship relationships
K[-index] = K[-index] * (1 / Dmin)
}
# Find the highest off-diagonal kinship value
Omax = max(K[-index])
# If the highest off-diagonal value exceeds the upper limit, rescale it
if (Omax > top) {
print("Adjustment by the minimum off diagonal")
K[-index] = K[-index] * (top / Omax) # Scale all off-diagonal values down to fit within the range
}
# Return the final adjusted kinship matrix
K
## [,1] [,2] [,3]
## [1,] 1.833333e+00 0.000000e+00 1.017704e-16
## [2,] 0.000000e+00 1.833333e+00 3.053113e-16
## [3,] 1.017704e-16 3.053113e-16 1.833333e+00
VanRaden & Zhang Kinship Algorithm in GAPIT
source("http://zzlab.net/GAPIT/gapit_functions.txt")
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
taxa=myGD[,1]
favorite=c("33-16", "38-11", "B73", "B73HTRHM", "CM37", "CML333", "MO17", "YU796NS")
index=taxa%in%favorite
snps=myGD[,-1]
# K=GAPIT.kinship.loiselle(t(myGD[,-1]), method="additive", use="all")
# K[index,index]
K1=GAPIT.kinship.VanRaden(snps)
## [1] "Calculating kinship with VanRaden method..."
## [1] "substracting P..."
## [1] "Getting X'X..."
## [1] "Adjusting..."
## [1] "Calculating kinship with VanRaden method: done"
K1[index,index]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1.767580817 0.03129311 -0.16338962 -0.14873793 0.06838917 -0.018256344
## [2,] 0.031293106 1.85921066 -0.07053974 -0.06843069 -0.04885046 -0.071737463
## [3,] -0.163389625 -0.07053974 2.41786109 2.27259390 -0.04180544 -0.202661683
## [4,] -0.148737926 -0.06843069 2.27259390 2.29247170 -0.04910340 -0.204733528
## [5,] 0.068389170 -0.04885046 -0.04180544 -0.04910340 2.03060839 -0.070178930
## [6,] -0.018256344 -0.07173746 -0.20266168 -0.20473353 -0.07017893 1.958703843
## [7,] 0.006196631 -0.04728449 -0.20329402 -0.19073277 0.09746939 0.005597768
## [8,] -0.010303733 -0.03138299 -0.13095056 -0.11943453 0.05379327 -0.061073228
## [,7] [,8]
## [1,] 0.006196631 -0.01030373
## [2,] -0.047284488 -0.03138299
## [3,] -0.203294024 -0.13095056
## [4,] -0.190732767 -0.11943453
## [5,] 0.097469389 0.05379327
## [6,] 0.005597768 -0.06107323
## [7,] 1.911449417 0.06476623
## [8,] 0.064766231 1.84918249
K2=GAPIT.kinship.Zhang(snps)
## [1] "Calculating ZHANG relationship defined by Zhiwu Zhang..."
## [1] "substracting mean..."
## [1] "Getting X'X..."
## [1] "Adjusting..."
## [1] "Adjustment by the minimum diagonal"
## [1] "Calculating kinship with Zhang method: done"
K2[index,index]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.5380605 0.2858844 0.1412412 0.1521270 0.3134457 0.2490707 0.2672385
## [2,] 0.2858844 1.6031515 0.2102258 0.2117928 0.2263403 0.2093359 0.2275037
## [3,] 0.1412412 0.2102258 2.0000000 1.9511011 0.2315745 0.1120633 0.1115935
## [4,] 0.1521270 0.2117928 1.9511011 1.9109272 0.2261523 0.1105240 0.1209261
## [5,] 0.3134457 0.2263403 0.2315745 0.2261523 1.7249073 0.2104939 0.3350514
## [6,] 0.2490707 0.2093359 0.1120633 0.1105240 0.2104939 1.6738285 0.2667936
## [7,] 0.2672385 0.2275037 0.1115935 0.1209261 0.3350514 0.2667936 1.6402604
## [8,] 0.2549793 0.2393181 0.1653424 0.1738985 0.3026014 0.2172591 0.3107539
## [,8]
## [1,] 0.2549793
## [2,] 0.2393181
## [3,] 0.1653424
## [4,] 0.1738985
## [5,] 0.3026014
## [6,] 0.2172591
## [7,] 0.3107539
## [8,] 1.5960278
dK1 = diag(K1[index,index])
dK2 = diag(K2[index,index])
# Plot side-by-side
boxplot(list(dK1, dK2), names = c("VanRaden", "Zhang"), boxwex = 0.4, main = "Comparison of K1 and K2")

# Comparison
heatmap.2(K1, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
#quartz()
heatmap.2(K2, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")


# Common and differences
n=nrow(myGD)
ind.a=seq(1:(n*n))
i =1:n
j=(i-1)*n
ind.d=i+j
par(mfrow=c(1,3))
plot(K2[ind.a],K1[ind.a],main="All elements",xlab="Zhang",ylab="VanRaden")
lines(K2[ind.d],K1[ind.d],main="All elements",xlab="Zhang",ylab="VanRaden",col="red",type="p")
plot(K2[ind.d],K1[ind.d],main="Diagonals",xlab="Zhang",ylab="VanRaden")
plot(K2[-ind.d],K1[-ind.d],main="Off diag",xlab="Zhang",ylab="VanRaden")

Simulation with GAPIT
source("http://zzlab.net/GAPIT/gapit_functions.txt")
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)
index1to5=myGM[,2]<6
set.seed(99164)
mySim=GAPIT.Phenotype.Simulation(GD=myGD[,c(TRUE,index1to5)], # 'TRUE' means the first column is kept.
GM=myGM[index1to5,],
h2=.7,
NQTN=40,
effectunit =.95, # gene effects follow Geometry distribution
QTNDist="normal")
MLM in GAPIT
setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/MLM")
myGAPIT=GAPIT(
Y=mySim$Y,
GD=myGD,
GM=myGM,
PCA.total=3,
QTN.position=mySim$QTN.position,
model="MLM"
)
GLM in GAPIT
setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/GLM")
myGAPIT=GAPIT(
Y=mySim$Y,
GD=myGD,
GM=myGM,
PCA.total=3,
QTN.position=mySim$QTN.position,
model="GLM"
)
t Test in GAPIT
setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/tTest")
myGAPIT=GAPIT(
Y=mySim$Y,
GD=myGD,
GM=myGM,
PCA.total=0,
QTN.position=mySim$QTN.position,
model="GLM",
memo="tTest"
)
Multiple models
setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/Multi")
myGAPIT=GAPIT(
Y=mySim$Y,
GD=myGD,
GM=myGM,
PCA.total=3,
QTN.position=mySim$QTN.position,
model=c("GLM","MLM")
)