# CROP545 LAB 11 # CODE PROVIDED BY ZHIWU ZHANG # COMMENTED AND MODIFIED BY YUANHONG SONG # In this lab we will discuss the SUPER algorithm in the GAPIT package # 0. SETUP ---- # 0.1 Packages and functions ---- # Please consider put all your work in one folder, there might be # multiple output files generated automatically. silver = "/Users/song/Dropbox/COURSE/CROP_545/2018/labs/lab11" setwd(silver) # The following packages are required for this script. # Please install the package if necessary. library('MASS') # required for ginv # 'multtest' package: source("https://bioconductor.org/biocLite.R") biocLite("multtest") library("multtest") library(gplots) library(compiler) # required for cmpfun # DO NOT install the 'compiler' package, simply load. library("scatterplot3d") # GAPIT package and functions source("http://www.zzlab.net/GAPIT/emma.txt") source("http://www.zzlab.net/GAPIT/gapit_functions.txt") # 0.2 Data ---- myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) # 0.3 Simulate phenotype data # Siultate 10 QTN on the first half chromosomes X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] taxa=myGD[,1] set.seed(99164) # Save the markers on chromosomes 1-5 with individual names. GD.candidate=cbind(taxa,X1to5) # This time we use the simulation fuction in GAPIT to simulate the phynometric # data. This function works in the same way as the G2P function. mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=10,QTNDist="norm") # 1. Calculate kinship from QTNS---- # The following calculate the kinship by VanRaden method. # Multiple outputs files (plots and spresheet) will be generated automatically. # It will be useful to keep the output in a seperate folder so we don't get confused # Here we learn how to create folders directly in R: kin = "kin" dir.create(file.path(silver, kin)) setwd("./kin") myGAPIT0=GAPIT( Y=mySim$Y, # phenotype GD=GD.candidate, # the genotype data GM=myGM[index1to5,], # gene map information SNP.test = F, # test SNPs or not group.from=5, # The starting number of groups of compression group.to=5, # the ending number of groups of compression group.by=10, # the interval for crompression groups memo="KbyQTN" # goes into the output, remind what you did ) # GLM, MLM, and CMLM in GAPIT # The choice of method is indicated by the grouping number # group.from = large number, group.to = large, then MLM (100000, 1000000, 10) # group.from = small number, group.to = large, then CMLM (1, 100000, 10) # group.from = small number, group to = small, then GLM (1, 1, 10) # Note that we were calculating the kinship in previous step, so the grouping # number was not indicating the GWAS method. # 2. GWAS by MLM ---- # Similar thing as above, we setting a new folder for the new outputs. MLM = "MLM" dir.create(file.path(silver, MLM)) setwd(paste(silver, MLM, sep="/")) # make sure you are in the MLM folder getwd() myGAPIT1=GAPIT( Y=mySim$Y, #pheno GD=myGD,# GM=myGM, KI=myGAPIT0$kinship,# including kinship in the algorithm QTN.position=mySim$QTN.position, PCA.total=3, # including PC #SNP.test = F, group.from=1000000, group.to=1000000, group.by=10, memo="GWASbyMLM") # observed vs fitted # mySim$u - the simulated phenotype # myGAPIT0$Pred$BLUP - predicted phenotype by GAPIT cor(mySim$u,myGAPIT0$Pred$BLUP) cor(mySim$u,myGAPIT1$Pred$BLUP) par(mfrow=c(1,2)) plot(mySim$u,myGAPIT0$Pred$BLUP) plot(mySim$u,myGAPIT1$Pred$BLUP) # 3. SUPER ---- SUPER = "SUPER" dir.create(file.path(silver, SUPER)) setwd(paste(silver, SUPER, sep="/")) getwd() # SUPER is not triggered by the choice of grouping number # instead, we will indicate the sangwich.top and bottom command myGAPIT=GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, QTN.position=mySim$QTN.position, PCA.total=3, sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER LD=0.1, memo="SUPER") # We will stop here today and pick it up next time # # 4. Power analysis using the GAPID.FDR.TypeI function # myStat=GAPIT.FDR.TypeI(WS=c(1e0,1e3,1e4,1e5), # window size, the inverse of resolution # GM=myGM, # seqQTN=mySim$QTN.position, # GWAS=myGAPIT$GWAS) # # # 5. Replicate the # replicate = "replicate" # dir.create(file.path(silver, replicate)) # setwd(paste(silver, replicate, sep="/")) # getwd() # # nrep=3 # # statRep=replicate(nrep,{ # mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate, # GM=myGM[index1to5,], # h2=.5,NQTN=10,QTNDist="norm") # end of simulation # # GWAS by SUPER, you may choose any other method # myGAPIT=GAPIT(Y=mySim$Y, GD=myGD, GM=myGM, # QTN.position=mySim$QTN.position, # PCA.total=3, # sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER # sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER # LD=0.1, # memo="SUPER") # end of GWAS # # Power analysis # myStat=GAPIT.FDR.TypeI(WS=c(1e0,1e3,1e4,1e5), # GM=myGM, # seqQTN=mySim$QTN.position, # GWAS=myGAPIT$GWAS)# end of power analysis # }# end of replicated command # ) # # you may use any other approach to replicate the process and collect the output you want # # #FDR # s.fdr=seq(3,length(statRep),7) # fdr=statRep[s.fdr] # fdr.mean=Reduce ("+", fdr) / length(fdr) # # #AUC: power vs. FDR # s.auc.fdr=seq(6,length(statRep),7) # auc.fdr=statRep[s.auc.fdr] # auc.fdr.mean=Reduce ("+", auc.fdr) / length(auc.fdr) # # theColor=rainbow(4) # plot(fdr.mean[,1],power , type="b", col=theColor [1],xlim=c(0,1)) # for(i in 2:ncol(fdr.mean)){ # lines(fdr.mean[,i], power , type="b", col= theColor [i]) # }