VanRaden Kinship Algorithm

# Read genotype data from zzlab and store it as a data frame
myGD = read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt", head=T)
# Remove the first column (IDs) to keep only the genotype matrix
X = myGD[, -1]
# Compute allele frequencies (bi-allelic SNPs, so divide by 2)
p = colMeans(X) / 2  
# Center the genotype matrix by subtracting 1 (allele coding {0,1,2} → {-1,0,1})
M = X - 1  
# Standardize the genotype matrix to create the Z matrix
# t(M) converts M from an individuals × SNPs matrix to an SNPs × individuals matrix.
# Subtracting 0.5 centers the allele frequency around zero
Z = t(M) - 2 * (p - 0.5)  
# Compute the kinship matrix 
# crossprod(A, B) is a shortcut for matrix multiplication
# Since Z is an SNPs × Individuals matrix, its transpose (Z^T) becomes Individuals × SNPs.
# The multiplication Z^T Z results in an Individuals × Individuals matrix.
K = crossprod(Z, Z)  
# Adjust for allele frequency variance to normalize the kinship matrix
# p is the allele frequency for each SNP.
# 1 - p is the frequency of the alternative allele.
# p * (1 - p) gives the genetic variance at each SNP.
adj = 2 * sum(p * (1 - p))  
K = K / adj  
# Visualize the kinship matrix using a heatmap
heatmap(K) 

heatmap(K, Colv = NA,Rowv = NA) 

Zhang Kinship Algorithm

a = c(0,1,2,0,0,1,2,1,0,1,2,2)
# representing genotypes for 3 individuals across 4 SNPs
snps = matrix(a, 3, 4, byrow=T)
snps  # Print the SNP matrix
##      [,1] [,2] [,3] [,4]
## [1,]    0    1    2    0
## [2,]    0    1    2    1
## [3,]    0    1    2    2
# Compute the (column-wise mean) of each SNP
snpMean = apply(snps, 2, mean)  
snpMean  # Print the mean values of each SNP
## [1] 0 1 2 1
# Subtract the mean of each SNP from all its values to center the data
snps = t(snps) - snpMean    
# Transpose back to retain original structure (individuals as rows, SNPs as columns)
t(snps)
##      [,1] [,2] [,3] [,4]
## [1,]    0    0    0   -1
## [2,]    0    0    0    0
## [3,]    0    0    0    1
# Compute the kinship matrix u
K = crossprod(snps, snps)  
K  # Print the kinship matrix
##      [,1] [,2] [,3]
## [1,]    1    0   -1
## [2,]    0    0    0
## [3,]   -1    0    1

Rescale between 0 and 2 for inbred

a = c(0,1,2,0,1,1,2,1,0,2,2,1)
# representing genotypes for 3 individuals across 4 SNPs
snps = matrix(a, 3, 4, byrow=T)
snps
##      [,1] [,2] [,3] [,4]
## [1,]    0    1    2    0
## [2,]    1    1    2    1
## [3,]    0    2    2    1
# Calculate allele frequencies for each SNP
fa = colSums(snps) / (2 * nrow(snps))  # Sum up allele counts and divide by 2 * number of individuals
fa
## [1] 0.1666667 0.6666667 1.0000000 0.3333333
# Identify SNPs with fixed alleles (frequency = 0 or 1) and remove them
index.non = fa >= 1 | fa <= 0  # Create a logical index for SNPs that are monomorphic (no variation)
snps = snps[, !index.non]  # Keep only polymorphic SNPs (those with variation)
snps
##      [,1] [,2] [,3]
## [1,]    0    1    0
## [2,]    1    1    1
## [3,]    0    2    1
# Compute heterozygosity matrix (1 if heterozygous, 0 otherwise)
het = 1 - abs(snps - 1)  # If the SNP value is 1 (heterozygous), result is 1, otherwise 0
het
##      [,1] [,2] [,3]
## [1,]    0    1    0
## [2,]    1    1    1
## [3,]    0    0    1
# Calculate individual heterozygosity by summing heterozygous SNPs per individual
ind.sum = rowSums(het)  # Total number of heterozygous sites per individual
ind.sum
## [1] 1 3 1
# Compute individual inbreeding coefficient: (1 - observed heterozygosity)
fi = ind.sum / (2 * ncol(snps))  # Observed heterozygosity per individual
fi
## [1] 0.1666667 0.5000000 0.1666667
inbreeding = 1 - min(fi)  # Inbreeding coefficient is 1 minus the lowest heterozygosity
inbreeding
## [1] 0.8333333
# Get the number of SNPs and individuals
nSNP = ncol(snps)  # Total number of SNPs after filtering
nInd = nrow(snps)  # Total number of individuals
n = nInd  # Store the number of individuals in a variable n

# Compute the mean genotype value for each SNP
snpMean = apply(snps, 2, mean)  # Get the average genotype value per SNP
# Center the genotype matrix by subtracting the SNP mean (column-wise operation)
snps = t(snps) - snpMean  # Transpose to apply column-wise mean subtraction, then transpose back
# Compute the kinship matrix 
# K = tcrossprod(snps, snps)  # Alternative method
K = crossprod(snps, snps)  # Equivalent to snps' %*% snps, computing X'X

# Extracting Diagonal Elements
i = 1:n         # Generate a sequence from 1 to n (number of individuals)
j = (i-1) * n   # Compute indices for diagonal elements in the flattened matrix
index = i + j   # Diagonal indices in vectorized form
d = K[index]    # Extract diagonal elements (self-kinship values)
d
## [1] 0.6666667 0.6666667 0.6666667
# Compute Key Values for Rescaling
DL = min(d)    # Smallest diagonal value 
DU = max(d)    # Largest diagonal value
floor = min(K) # Minimum value in the entire kinship matrix 

# Normalize K Between 0 and 2
top = 1 + inbreeding  # Set the upper limit based on the inbreeding coefficient
K = top * (K - floor) / (DU - floor)  # Rescale K so that values range from 0 to 'top'
Dmin = top * (DL - floor) / (DU - floor)  # Compute the new minimum diagonal value after rescaling

# Adjust Diagonal if It's Below Expected (1)
if (Dmin < 1) {  
  print("Adjustment by the minimum diagonal")  
  # Adjust diagonal values to ensure self-relatedness is at least 1
  K[index] = (K[index] - Dmin + 1) / ((top + 1 - Dmin) * 0.5)  
  # Rescale off-diagonal values to maintain overall kinship relationships
  K[-index] = K[-index] * (1 / Dmin)  
}

# Find the highest off-diagonal kinship value
Omax = max(K[-index])  
# If the highest off-diagonal value exceeds the upper limit, rescale it
if (Omax > top) {  
  print("Adjustment by the minimum off diagonal")  
  K[-index] = K[-index] * (top / Omax)  # Scale all off-diagonal values down to fit within the range
}

# Return the final adjusted kinship matrix
K
##              [,1]         [,2]         [,3]
## [1,] 1.833333e+00 0.000000e+00 1.017704e-16
## [2,] 0.000000e+00 1.833333e+00 3.053113e-16
## [3,] 1.017704e-16 3.053113e-16 1.833333e+00

VanRaden & Zhang Kinship Algorithm in GAPIT

source("http://zzlab.net/GAPIT/gapit_functions.txt")
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
taxa=myGD[,1]
favorite=c("33-16",       "38-11",     "B73",      "B73HTRHM",       "CM37",     "CML333",  "MO17",  "YU796NS") 
index=taxa%in%favorite
snps=myGD[,-1]

# K=GAPIT.kinship.loiselle(t(myGD[,-1]), method="additive", use="all")
# K[index,index]

K1=GAPIT.kinship.VanRaden(snps)
## [1] "Calculating kinship with VanRaden method..."
## [1] "substracting P..."
## [1] "Getting X'X..."
## [1] "Adjusting..."
## [1] "Calculating kinship with VanRaden method: done"
K1[index,index]
##              [,1]        [,2]        [,3]        [,4]        [,5]         [,6]
## [1,]  1.767580817  0.03129311 -0.16338962 -0.14873793  0.06838917 -0.018256344
## [2,]  0.031293106  1.85921066 -0.07053974 -0.06843069 -0.04885046 -0.071737463
## [3,] -0.163389625 -0.07053974  2.41786109  2.27259390 -0.04180544 -0.202661683
## [4,] -0.148737926 -0.06843069  2.27259390  2.29247170 -0.04910340 -0.204733528
## [5,]  0.068389170 -0.04885046 -0.04180544 -0.04910340  2.03060839 -0.070178930
## [6,] -0.018256344 -0.07173746 -0.20266168 -0.20473353 -0.07017893  1.958703843
## [7,]  0.006196631 -0.04728449 -0.20329402 -0.19073277  0.09746939  0.005597768
## [8,] -0.010303733 -0.03138299 -0.13095056 -0.11943453  0.05379327 -0.061073228
##              [,7]        [,8]
## [1,]  0.006196631 -0.01030373
## [2,] -0.047284488 -0.03138299
## [3,] -0.203294024 -0.13095056
## [4,] -0.190732767 -0.11943453
## [5,]  0.097469389  0.05379327
## [6,]  0.005597768 -0.06107323
## [7,]  1.911449417  0.06476623
## [8,]  0.064766231  1.84918249
K2=GAPIT.kinship.Zhang(snps)
## [1] "Calculating ZHANG relationship defined by Zhiwu Zhang..."
## [1] "substracting mean..."
## [1] "Getting X'X..."
## [1] "Adjusting..."
## [1] "Adjustment by the minimum diagonal"
## [1] "Calculating kinship with Zhang method: done"
K2[index,index]
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 1.5380605 0.2858844 0.1412412 0.1521270 0.3134457 0.2490707 0.2672385
## [2,] 0.2858844 1.6031515 0.2102258 0.2117928 0.2263403 0.2093359 0.2275037
## [3,] 0.1412412 0.2102258 2.0000000 1.9511011 0.2315745 0.1120633 0.1115935
## [4,] 0.1521270 0.2117928 1.9511011 1.9109272 0.2261523 0.1105240 0.1209261
## [5,] 0.3134457 0.2263403 0.2315745 0.2261523 1.7249073 0.2104939 0.3350514
## [6,] 0.2490707 0.2093359 0.1120633 0.1105240 0.2104939 1.6738285 0.2667936
## [7,] 0.2672385 0.2275037 0.1115935 0.1209261 0.3350514 0.2667936 1.6402604
## [8,] 0.2549793 0.2393181 0.1653424 0.1738985 0.3026014 0.2172591 0.3107539
##           [,8]
## [1,] 0.2549793
## [2,] 0.2393181
## [3,] 0.1653424
## [4,] 0.1738985
## [5,] 0.3026014
## [6,] 0.2172591
## [7,] 0.3107539
## [8,] 1.5960278
dK1 = diag(K1[index,index])
dK2 = diag(K2[index,index])
# Plot side-by-side
boxplot(list(dK1, dK2), names = c("VanRaden", "Zhang"), boxwex = 0.4, main = "Comparison of K1 and K2")

# Comparison
heatmap.2(K1,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
#quartz()
heatmap.2(K2,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")

# Common and differences
n=nrow(myGD)
ind.a=seq(1:(n*n))
i =1:n
j=(i-1)*n
ind.d=i+j
par(mfrow=c(1,3))
plot(K2[ind.a],K1[ind.a],main="All elements",xlab="Zhang",ylab="VanRaden")
lines(K2[ind.d],K1[ind.d],main="All elements",xlab="Zhang",ylab="VanRaden",col="red",type="p")
plot(K2[ind.d],K1[ind.d],main="Diagonals",xlab="Zhang",ylab="VanRaden")
plot(K2[-ind.d],K1[-ind.d],main="Off diag",xlab="Zhang",ylab="VanRaden")

Simulation with GAPIT

source("http://zzlab.net/GAPIT/gapit_functions.txt") 
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) 
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)
index1to5=myGM[,2]<6 

set.seed(99164) 
mySim=GAPIT.Phenotype.Simulation(GD=myGD[,c(TRUE,index1to5)], # 'TRUE' means the first column is kept.
GM=myGM[index1to5,],
h2=.7,
NQTN=40, 
effectunit =.95, # gene effects follow Geometry distribution
QTNDist="normal")

MLM in GAPIT

setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/MLM")
myGAPIT=GAPIT(
  Y=mySim$Y, 
  GD=myGD, 
  GM=myGM,
  PCA.total=3, 
  QTN.position=mySim$QTN.position, 
  model="MLM"
  )

GLM in GAPIT

setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/GLM")
myGAPIT=GAPIT(
  Y=mySim$Y,
  GD=myGD,
  GM=myGM,
  PCA.total=3, 
  QTN.position=mySim$QTN.position, 
  model="GLM"
  )

t Test in GAPIT

setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/tTest")
myGAPIT=GAPIT(
  Y=mySim$Y,
  GD=myGD,
  GM=myGM,
  PCA.total=0, 
  QTN.position=mySim$QTN.position, 
  model="GLM",
  memo="tTest"
  )

Multiple models

setwd("~/Desktop/CROP_545/Liang_Lab/Lab7/Multi")
myGAPIT=GAPIT(
  Y=mySim$Y,
  GD=myGD,
  GM=myGM,
  PCA.total=3, 
  QTN.position=mySim$QTN.position, 
  model=c("GLM","MLM")
  )