--- title: "Lab 11" author: "Zhou" date: "4/6/2021" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## 0.Simulate the phenotype ```{r} setwd("/Users/ZhouTang/Downloads/WSU_courses/2021_spring/545/2021/Lab/Lab11") library(compiler) #required for cmpfun source("http://www.zzlab.net/GAPIT/gapit_functions.txt") #Import demo data myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) myCV=read.table(file="http://zzlab.net/GAPIT/data/mdp_env.txt",head=T) #Simultate 10 QTN on the first half chromosomes X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] taxa=myGD[,1] set.seed(11) GD.candidate=cbind(taxa,X1to5) mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=10, effectunit =.95,QTNDist="normal",CV=myCV,cveff=c(.51,.51)) ``` ## 1.Prediction with PC ```{r} myGAPIT <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, PCA.total=3, QTN.position=mySim$QTN.position, model="GLM") ``` ```{r} # check the correlation between the prediction and phenotype ry2=cor(myGAPIT$Pred$Prediction,mySim$Y[,2])^2 ru2=cor(myGAPIT$Pred$Prediction,mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT$Pred$Prediction,mySim$Y[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) ``` ## 2.Prediction with env ```{r} myGAPIT <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, PCA.total=0, CV=myCV, QTN.position=mySim$QTN.position, model="GLM") ry2=cor(myGAPIT$Pred$Prediction,mySim$Y[,2])^2 ru2=cor(myGAPIT$Pred$Prediction,mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT$Pred$Prediction,mySim$Y[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) ``` ### 2.1 Prediction with lm function ```{r} # ENV summary(lm(mySim$Y[,2] ~ myCV[,2]+myCV[,3])) lm_cv <- lm(mySim$Y[,2] ~ myCV[,2] + myCV[,3]) summary(lm_cv) # lm_cv <- lm(mySim$Y[,2] ~ myCV[,2] + myCV[,3] + myCV[,2] * myCV[,3]) summary(lm(mySim$u ~ myCV[,2]+myCV[,3])) # PCs pcs <- prcomp(myGD[,-1]) summary(lm(mySim$Y[,2] ~ pcs$x[,1]+pcs$x[,2]+pcs$x[,3])) pcs_regression <- lm(mySim$Y[,2] ~ pcs$x[,1]+pcs$x[,2]+pcs$x[,3]) summary(pcs_regression) summary(lm(mySim$u ~ pcs$x[,1]+pcs$x[,2]+pcs$x[,3])) ``` ## 3.Prediction with SNPs ### 3.1 To get SNPs that are associated with the phenotype by GLM ```{r} myGAPIT_GLM <- GAPIT( Y=mySim$Y, GD=myGD,#Genotype GM=myGM,#Map information PCA.total=3, CV=myCV, QTN.position=mySim$QTN.position, model="GLM",# MLM CMLM GLM SUPER MLMM FarmCPU Blink ) ``` ### 3.2 Prediction with top 10 SNPs ```{r} ntop=10 index=order(myGAPIT$GWAS[,4]) top=index[1:ntop] myCV_top10SNP = myGD[,c(1,top+1)] # myCV_top10SNP= cbind(myGAPIT$PCA[,1],myGD[,c(top+1)]) # Code below didn't work # myCV_top10SNP_pre = myGD[,c(top+1)] myGAPIT <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, CV= myCV_top10SNP, QTN.position=mySim$QTN.position, model="GLM") ry2=cor(myGAPIT$Pred$Prediction,mySim$Y[,2])^2 ru2=cor(myGAPIT$Pred$Prediction,mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT$Pred$Prediction,mySim$Y[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) ``` ## 4. Prediction with PCs, env and SNPs Need to be done by yourself! ```{r} pcs <- prcomp(myGD[,-1]) myCV_PCsEnvSnps=cbind(myCV,myGD[,c(top+1)],pcs$x[,1:3]) myGAPIT <- GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, CV= myCV_PCsEnvSnps, QTN.position=mySim$QTN.position, model="GLM") ry2=cor(myGAPIT$Pred$Prediction,mySim$Y[,2])^2 ru2=cor(myGAPIT$Pred$Prediction,mySim$u)^2 par(mfrow=c(2,1), mar = c(3,4,1,1)) plot(myGAPIT$Pred$Prediction,mySim$Y[,2]) mtext(paste("R square=",ry2,sep=""), side = 3) plot(myGAPIT$Pred[,8],mySim$u) mtext(paste("R square=",ru2,sep=""), side = 3) ``` ## 5. Cross validation ```{r} # install.packages("caret") library(caret) # Create folds for your data my_data <- data.frame(y=mySim$Y[,2],pcs$x[,1:3]) flds <- createFolds(my_data[,1], k=3, list = TRUE, returnTrain = FALSE) str(flds) ``` ```{r} # cross validation for (i in 1:3){ folds <- flds test_idx <- folds[[i]] folds[[i]] <- NULL train_idx <- c(folds[[1]], folds[[2]]) str(test_idx) str(train_idx) #data_train trainData <- my_data[train_idx, ] #data_test testData <- my_data[test_idx, ] #train model lmModel <- train(y ~ PC1 + PC2 + PC3, data=trainData, method = "lm") #make prediction preY <- predict(lmModel, testData) #accuracy print(cor(preY,testData$y)^2) } ```