# CROP545 LAB 12 # CODE PROVIDED BY ZHIWU ZHANG # COMMENTED AND MODIFIED BY YUANHONG SONG # We will continue the content from last week: GWAS using GAPIT package. # Some sections will be overlaping with last week's content # The codes we using today are copied from lecture slide #21 Blink # 0. SETUP ---- # 0.1 Packages and functions ---- # Please consider put all your work in one folder, there might be # multiple output files generated automatically. silver = "/Users/song/Dropbox/COURSE/CROP_545/2018/labs/lab12" setwd(silver) # Please check the following packages carefully # install any package you might need library('MASS') # required for ginv # 'multtest' package: # source("https://bioconductor.org/biocLite.R") # biocLite("multtest") library("multtest") library(gplots) library(compiler) # required for cmpfun # DO NOT install the 'compiler' package, simply load. library("scatterplot3d") # GAPIT package and functions source("http://www.zzlab.net/GAPIT/emma.txt") # EMMA source("http://www.zzlab.net/GAPIT/gapit_functions.txt") # GAPIT # 0.2 Data ---- myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) # 0.3 Simulate phenotype data # Siultate 10 QTN on the first half chromosomes X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] taxa=myGD[,1] set.seed(99164) # Save the markers on chromosomes 1-5 with individual names. GD.candidate=cbind(taxa,X1to5) # This time we use the simulation fuction in GAPIT to simulate the phynometric # data. This function works in the same way as the G2P function. mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=10,QTNDist="norm") # 0.3 Calculate kinship from QTNS---- # The following calculate the kinship by VanRaden method. # Multiple outputs files (plots and spresheet) will be generated automatically. # It will be useful to keep the output in a seperate folder so we don't get confused # Here we learn how to create folders directly in R: kin = "kin" dir.create(file.path(silver, kin)) setwd("./kin") myGAPIT0=GAPIT( Y=mySim$Y, # phenotype GD=GD.candidate, # the genotype data GM=myGM[index1to5,], # gene map information SNP.test = F, # test SNPs or not group.from=5, # The starting number of groups of compression group.to=5, # the ending number of groups of compression group.by=10, # the interval for crompression groups memo="KbyQTN" # goes into the output, remind what you did ) # GLM, MLM, and CMLM in GAPIT # The choice of method is indicated by the grouping number # group.from = large number, group.to = large, then MLM (100000, 1000000, 10) # group.from = small number, group.to = large, then CMLM (1, 100000, 10) # group.from = small number, group to = small, then GLM (1, 1, 10) # Note that we were calculating the kinship in previous step, so the grouping # number was not indicating the GWAS method. # 1. GWAS by GLM---- dir.create(file.path(silver, "GLM")) setwd(paste(silver, "GLM", sep="/")) getwd() # For GLM, we just need to set the number of bins myGLM=GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, QTN.position=mySim$QTN.position, PCA.total=3, group.from = 1, group.to = 1, group.by = 10, memo="GLM") # 2. GWAS by MLM ---- # Similar thing as above, we setting a new folder for the new outputs. dir.create(file.path(silver, "MLM")) setwd(paste(silver, "MLM", sep="/")) getwd() # For MLM, group.from = large number, group.to = large (100000, 1000000, 10) myMLM=GAPIT( Y=mySim$Y, #pheno GD=myGD, GM=myGM, KI=myGAPIT0$kinship,# including kinship in the algorithm QTN.position=mySim$QTN.position, PCA.total=3, # including PC group.from=1000000, group.to=1000000, group.by=10, memo="GWASbyMLM") # observed vs fitted # mySim$u - the simulated phenotype # myGLM$Pred$BLUP - predicted phenotype by GAPIT cor(mySim$u,myGLM$Pred$BLUP) cor(mySim$u,myMLM$Pred$BLUP) par(mfrow=c(1,2)) plot(mySim$u,myGLM$Pred$BLUP) plot(mySim$u,myMLM$Pred$BLUP) # 3. CMLM dir.create(file.path(silver, "CMLM")) setwd(paste(silver, "CMLM", sep="/")) getwd() myCMLM=GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, QTN.position=mySim$QTN.position, PCA.total=3, group.from = 1, group.to = 1000, group.by = 10, memo="CMLM") # 4. SUPER ---- dir.create(file.path(silver, "SUPER")) setwd(paste(silver, "SUPER", sep="/")) getwd() # SUPER is not triggered by the choice of grouping number # instead, we will indicate the sangwich.top and bottom command mySUPER=GAPIT( Y=mySim$Y, GD=myGD, GM=myGM, QTN.position=mySim$QTN.position, PCA.total=3, sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER LD=0.1, memo="SUPER") # 5. FarmCPU ---- dir.create(file.path(silver, "farmCPU")) setwd(paste(silver, "farmCPU", sep="/")) getwd() # FarmCPU need supports from other packages # details see FarmCPU user manual install.packages("bigmemory") install.packages("biganalytics") # Import library (each time to start R) library(bigmemory) library(biganalytics) # require(compiler) #for cmpfun source("http://www.zzlab.net/FarmCPU/FarmCPU_functions.txt")#web source code # perform GWAS by FarmCPU # rep{ mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=10,QTNDist="norm") myFarmCPU= FarmCPU(Y=mySim$Y,GD=myGD,GM=myGM) # Getting p-values #myP=as.numeric(myFarmCPU$GWAS$P.value) power=GAPIT.FDR.TypeI(WS=1e5,GM=myGM,seqQTN=mySim$QTN.position,GWAS=cbind(myGM, gapit$P)) } # Combine gene map file with p value (so we know the location of the markers) myGI.MP=cbind(myGM[,-1],myP) # Create Manhattan plot # The output of this Manhattan plot will go to the folder GAPIT.Manhattan(GI.MP=myGI.MP,seqQTN=mySim$QTN.position) # Create QQ plot # Plot will go to the output folder GAPIT.QQ(myP) # 6. BLINK ----- dir.create(file.path(silver, "BLINK")) setwd(paste(silver, "BLINK", sep="/")) getwd() # we will get a copy of the blink.exe in Blink folder we just created # MAC users: please use blink_mac # WINDOWS users: please use blink.exe file.copy(file.path(silver,"BLINK-master/blink_mac"), to="blink_mac") # if it work myY=mySim$Y colnames(myY)=c("taxa", "SimPheno") myGD1=t(myGD[,-1]) # Since the BLINK was processed by computer terminal, not by R # we need to save data as data files in hard drive folder write.table(myY,file="myData.txt",quote=F,row.name=F,col.name=T,sep="\t") write.table(myGD1,file="myData.dat",quote=F,row.name=F,col.name=F,sep="\t") write.table(myGM,file="myData.map",quote=F,row.name=F,col.name=T,sep="\t") #run blink system("./blink_mac --file myData --max_loop 10 --numeric --gwas") # windows users: please use ./blink #Extract p values result<- read.table("SimPheno_GWAS_result.txt", head = TRUE) myP=as.numeric(result[,5]) myGI.MP=cbind(myGM[,-1],myP) GAPIT.Manhattan(GI.MP=myGI.MP,seqQTN=mySim$QTN.position) GAPIT.QQ(myP) # 7. Power---- GWAS = cbind(myGM$SNP, myGM$Chromosome, ) myStat=GAPIT.FDR.TypeI( WS=c(1e0,1e3,1e4,1e5), # WS = window size GM=myGM, # dataset seqQTN=mySim$QTN.position, # QTN position GWAS=myGLM )