# Crop 545 lab 9 - lec 12 structure & PCA # code provided by Zhiwu Zhang # modified and commented by Yuanhong Song # In this script we will learn how to solve kinship using the GAPIT function # 1. Zhang's algorithm demonstrated in small matrix try out ---- # To understand how the Identical by Status matrix works, # we first try it on a small dataset (only 4 by 3). (snps=matrix(c(0,1,2,0,0,1,2,1,0,1,2,2), 3,4,byrow=T)) (snpMean= apply(snps,2,mean)) #get mean for each snp (snps=t(snps)-snpMean ) #operation on matrix and vector goes in direction of column (K=crossprod(snps, snps)) # equivelent to t(snps)%*%snps # 2. Zhang's and VanRaden's algorithm in GAPIT ---- library(compiler) #required for cmpfun library(gplots) # might require installation source("http://www.zzlab.net/GAPIT/gapit_functions.txt") myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) taxa=myGD[,1] # label for the individuals, it will be a column of strings favorite=c("33-16", "38-11", "B73", "B73HTRHM", "CM37", "CML333", "MO17", "YU796NS") # our favourite individuals # The following 2 lines is an example for how %in% works: # %in% is used for find matching values, similar to the which() function. # B73=taxa%in%c("B73") # T/F for "is this row "B73"? " # taxa=taxa[!B73] # Use the method to extract our favourtie. index=taxa%in%favorite # T/F for "is this our favourite individual? ", used later for extracting 'favourite' rows. snps=myGD[,-1] # snps[1:5, 1:5] #snps=snps[!B73,] #K=GAPIT.kinship.loiselle(t(myGD[,-1]), method="additive", use="all") #K[index,index] # Use the GAPIT function to solve the kinship matrix by VanRaden's algorithm K1=GAPIT.kinship.VanRaden(snps) K1[index,index] # Use the GAPIT function to solve the kinship matrix by Zhang's algorithm K2=GAPIT.kinship.ZHANG(snps) K2[index,index] # Heatmap for visualize the kinship by Zhang's method heatmap.2(K1, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none") #quartz() # For VanRaden's method heatmap.2(K2, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none") # Compare the two different methods n=nrow(snps) ind.a=seq(1:(n*n)) i =1:n j=(i-1)*n ind.d=i+j par(mfrow=c(1,3)) plot(K2[ind.a],K1[ind.a],main="All elements",xlab="Zhang",ylab="VanRaden") # use indexing to pair up the elements and plot against each other lines(K2[ind.d],K1[ind.d],main="All elements",xlab="Zhang",ylab="VanRaden",col="red") plot(K2[ind.d],K1[ind.d],main="Diagonals",xlab="Zhang",ylab="VanRaden") plot(K2[-ind.d],K1[-ind.d],main="Off diag",xlab="Zhang",ylab="VanRaden")