#CROPS545_Lecture23_CV #Zhiwu Zhang #April 3, 2016 rm(list=ls()) #Import GAPIT #source("http://www.bioconductor.org/biocLite.R") #biocLite("multtest") #install.packages("EMMREML") #install.packages("gplots") #install.packages("scatterplot3d") library('MASS') # required for ginv library(multtest) library(gplots) library(compiler) #required for cmpfun library("scatterplot3d") library("EMMREML") source("http://www.zzlab.net/GAPIT/emma.txt") source("http://www.zzlab.net/GAPIT/gapit_functions.txt") #source("~/Dropbox/GAPIT/Functions/emma.txt") source("~/Dropbox/GAPIT/Functions/gapit_functions.txt") #Import demo data myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) myCV=read.table(file="http://zzlab.net/GAPIT/data/mdp_env.txt",head=T) #myGD=read.table(file="~/Dropbox/Current/ZZLab/WSUCourse/CROPS545/Demo/mdp_numeric.txt",head=T) #myGM=read.table(file="~/Dropbox/Current/ZZLab/WSUCourse/CROPS545/Demo/mdp_SNP_information.txt",head=T) #myCV=read.table(file="~/Dropbox/Current/ZZLab/WSUCourse/CROPS545/Demo/mdp_env.txt",head=T) #Simultate 10 QTN on the first half chromosomes X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] taxa=myGD[,1] set.seed(99164) GD.candidate=cbind(taxa,X1to5) mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=20, effectunit =.95,QTNDist="normal",CV=myCV,cveff=c(.01,.01)) #order phenotype by ID index=order(mySim$Y[,1]) myY=mySim$Y[index,] setwd("~/Desktop/temp") #Prediction by PCA and ENV #---------------------------------------------------------------------------------------- myGAPIT <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, PCA.total=3, #CV=myCV, #CV.Inheritance=6, group.from=1000, group.to=1000, group.by=10, QTN.position=mySim$QTN.position, #SNP.test=FALSE, memo="CMLM", ) str(myGAPIT) ry2=cor(myGAPIT$Pred[,8],myY[,2])^2 ru2=cor(myGAPIT$Pred[,8],mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT$Pred[,8],myY[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) #Adding the top associated SNPs ntop=50 index=order(myGAPIT$P) top=index[1:ntop] myQTN=cbind(myGAPIT$PCA[,1:4], myCV[,2:3],myGD[,c(top+1)]) myGAPIT2 <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, #PCA.total=3, CV=myQTN, group.from=1, group.to=1, group.by=10, QTN.position=mySim$QTN.position, SNP.test=FALSE, memo="GLM+QTN", ) ry2=cor(myGAPIT2$Pred[,8],myY[,2])^2 ru2=cor(myGAPIT2$Pred[,8],mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT2$Pred[,8],myY[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT2$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) #Real Cross validation set.seed(99164) n=nrow(mySim$Y) testing=sample(n,round(n/5),replace=F) training=-testing myGAPIT3 <- GAPIT( Y=mySim$Y[training,], GD=myGD, GM=myGM, CV=myCV, PCA.total=3, group.from=1, group.to=1, group.by=10, QTN.position=mySim$QTN.position, #SNP.test=FALSE, memo="Training", ) #Adding the top associated SNPs ntop=50 index=order(myGAPIT3$P) top=index[1:ntop] myQTN=cbind(myGAPIT3$PCA[,1:4], myCV[,2:3],myGD[,c(top+1)]) #Training myGAPIT4 <- GAPIT( Y=mySim$Y[training,], GD=myGD, GM=myGM, #PCA.total=3, CV=myQTN, group.from=1, group.to=1, group.by=1, QTN.position=mySim$QTN.position, #SNP.test=FALSE, memo="GLM+QTN", ) ry2=cor(myGAPIT4$Pred[training,8],myY[training,2])^2 ru2=cor(myGAPIT4$Pred[training,8],mySim$u[training])^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT4$Pred[training,8],myY[training,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT4$Pred[training,8],mySim$u[training]) mtext(paste("R square=",ru2,sep=""), side = 3) #Testing #calculate prediction effect=myGAPIT4$effect.cv X=as.matrix(cbind(1, myQTN[,-1])) Pred=X%*%effect ry2=cor(Pred[testing],myY[testing,2])^2 ru2=cor(Pred[testing],mySim$u[testing])^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(Pred[testing],myY[testing,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(Pred[testing],mySim$u[testing]) mtext(paste("R square=",ru2,sep=""), side = 3) #gBLUP/cBLUP/sBLUP myGAPIT5 <- GAPIT( Y=mySim$Y[training,], #Y=mySim$Y, GD=myGD, GM=myGM, PCA.total=3, CV=myCV, group.from=1000, group.to=1000, group.by=10, sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER SUPER_GS=TRUE, QTN.position=mySim$QTN.position, #SNP.test=FALSE, memo="gBLUP-cBLUP-sBLUP", ) #Training ry2=cor(myGAPIT5$Pred[training,8],myY[training,2])^2 ru2=cor(myGAPIT5$Pred[training,8],mySim$u[training])^2 ry2.blup=cor(myGAPIT5$Pred[training,5],myY[training,2])^2 ru2.blup=cor(myGAPIT5$Pred[training,5],mySim$u[training])^2 #Testing ry2=cor(myGAPIT5$Pred[testing,8],myY[testing,2])^2 ru2=cor(myGAPIT5$Pred[testing,8],mySim$u[testing])^2 ry2.blup=cor(myGAPIT5$Pred[testing,5],myY[testing,2])^2 ru2.blup=cor(myGAPIT5$Pred[testing,5],mySim$u[testing])^2 par(mfrow=c(2,2), mar = c(3,4,1,1)) plot(myGAPIT5$Pred[training,8],myY[training,2],col="dimgray") points(myGAPIT5$Pred[testing,8],myY[testing,2],col="red") mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT5$Pred[training,8],mySim$u[training],col="dimgray") points(myGAPIT5$Pred[testing,8],mySim$u[testing],col="red") mtext(paste("R square=",ru2,sep=""), side = 3) plot(myGAPIT5$Pred[training,5],myY[training,2],col="dimgray") points(myGAPIT5$Pred[testing,5],myY[testing,2],col="red") mtext(paste("R square=",ry2.blup,sep=""), side = 3) plot(myGAPIT5$Pred[training,5],mySim$u[training],col="dimgray") points(myGAPIT5$Pred[testing,5],mySim$u[testing],col="red") mtext(paste("R square=",ru2.blup,sep=""), side = 3) #Fake Cross validation