# Crop 545 lab 9 - lec 12 structure & PCA
# code provided by Zhiwu Zhang
# modified and commented by Yuanhong Song

# In this script we will learn how to solve kinship using the GAPIT function

# 1. Zhang's algorithm demonstrated in small matrix try out ----
# To understand how the Identical by Status matrix works, 
# we first try it on a small dataset (only 4 by 3).
(snps=matrix(c(0,1,2,0,0,1,2,1,0,1,2,2),
            3,4,byrow=T))
(snpMean= apply(snps,2,mean))   #get mean for each snp
(snps=t(snps)-snpMean )   #operation on matrix and vector goes in direction of column
(K=crossprod(snps, snps)) # equivelent to t(snps)%*%snps


# 2. Zhang's and VanRaden's algorithm in GAPIT ----
library(compiler) #required for cmpfun
library(gplots) # might require installation
source("http://www.zzlab.net/GAPIT/gapit_functions.txt")

myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
taxa=myGD[,1] # label for the individuals, it will be a column of strings
favorite=c("33-16", "38-11", "B73", "B73HTRHM", "CM37", "CML333", "MO17", "YU796NS") # our favourite individuals

# The following 2 lines is an example for how %in% works:
# %in% is used for find matching values, similar to the which() function.
# B73=taxa%in%c("B73") # T/F for "is this row "B73"? "
# taxa=taxa[!B73]

# Use the method to extract our favourtie. 
index=taxa%in%favorite # T/F for "is this our favourite individual? ", used later for extracting 'favourite' rows.
snps=myGD[,-1]
# snps[1:5, 1:5]
#snps=snps[!B73,]

#K=GAPIT.kinship.loiselle(t(myGD[,-1]), method="additive", use="all")
#K[index,index]

# Use the GAPIT function to solve the kinship matrix by VanRaden's algorithm
K1=GAPIT.kinship.VanRaden(snps)
K1[index,index]

# Use the GAPIT function to solve the kinship matrix by Zhang's algorithm
K2=GAPIT.kinship.ZHANG(snps)
K2[index,index]

# Heatmap for visualize the kinship by Zhang's method
heatmap.2(K1,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
#quartz()

# For VanRaden's method
heatmap.2(K2,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")


# Compare the two different methods
n=nrow(snps)
ind.a=seq(1:(n*n))
i =1:n
j=(i-1)*n
ind.d=i+j
par(mfrow=c(1,3))
plot(K2[ind.a],K1[ind.a],main="All elements",xlab="Zhang",ylab="VanRaden") # use indexing to pair up the elements and plot against each other
lines(K2[ind.d],K1[ind.d],main="All elements",xlab="Zhang",ylab="VanRaden",col="red")

plot(K2[ind.d],K1[ind.d],main="Diagonals",xlab="Zhang",ylab="VanRaden")
plot(K2[-ind.d],K1[-ind.d],main="Off diag",xlab="Zhang",ylab="VanRaden")