#####***Crops545HW4*** #####Author: Haixiao Dong and Ryan Oliveira #####Date: March, 2017 rm(list=ls()) opar <- par(no.readonly = T) set.seed(99164) #setwd("") #Import data, source code source("ALPACA.R") myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) myY=read.table(file="http://zzlab.net/GAPIT/data/CROP545_Phenotype.txt", head=T) myCV=read.table(file="http://zzlab.net/GAPIT/data/CROP545_Covariates.txt", head=T) source("http://zzlab.net/StaGen/2017/R/G2P.R") source("http://zzlab.net/StaGen/2017/R/GWASbyCor.R") library(mgcv) ###Question1#2#3# ###Please see ALPACA.R for the source code to these questions ###Question4# #We run the ALPACA function on supplied data (read above) #Then we output the results of ALPACA into "myGLM" #We also take the time before and after running the function #Then we subtract these times to give an approximate running time start.time = Sys.time() myGLM <- ALPACA( y=myY[, -1], X=myGD[, -1], C=myCV[, -1], GM=myGM) end.time = Sys.time() time.taken = end.time - start.time time.taken #The time it took is stored in "time.taken" #The results can be accessed via "str(myGLM) as well as the generated .pdfs and .txt ###Question5# #Simulation test using various NQTN, h2 set.seed(99164) X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] h2 <- c(0.25, 0.5, 0.75, 1) NQTN <- c(2, 10, 100) rept <- 100 #replicate time nsnp <- length(X[1,]) # for(i in 1:length(h2)){ for(j in 1:length(NQTN)){ assign(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep=""), matrix(NA, nrow=nsnp, ncol=0)) for(r in 1:rept){ mySim <- G2P(X=X1to5, h2=h2[i], alpha=1, NQTN=NQTN[j], distribution="normal") ###GWAS by GLM myGLM <- ALPACA( y=mySim$y, X=X, GM=myGM, QTN.position=mySim$QTN.position ) GLM.power <- get(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep="")) GLM.power <- cbind(GLM.power, as.vector(myGLM$power.fdr.type1error$power)) GLM.fdr <- get(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep="")) GLM.fdr <- cbind(GLM.fdr, myGLM$power.fdr.type1error$fdr) assign(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep=""), GLM.power) assign(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep=""), GLM.fdr) ###GWASbyCor myP <- GWASbyCor(X, mySim$y) order.SNP <- order(myP) Cor.pwrfdr <- power.fdr(order.SNP, mySim$QTN.position) Cor.power <- get(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep="")) Cor.power <- cbind(Cor.power, Cor.pwrfdr$power) Cor.fdr <- get(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep="")) Cor.fdr <- cbind(Cor.fdr, Cor.pwrfdr$fdr) assign(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep=""), Cor.power) assign(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep=""), Cor.fdr) } } } ###plot power-fdr pdf(file="Power.fdr_GLM-Cor.pdf", height=10, onefile=TRUE) par(mfrow=c(4, 3)) for(i in 1:length(h2)){ for(j in 1:length(NQTN)){ GLM.power <- get(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep="")) GLM.fdr <- get(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep="")) GLM.Y <- rowMeans(GLM.power) GLM.X <- rowMeans(GLM.fdr) Cor.power <- get(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep="")) Cor.fdr <- get(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep="")) Cor.Y <- rowMeans(Cor.power) Cor.X <- rowMeans(Cor.fdr) plot(GLM.X, GLM.Y, type="b", pch=1, col="red", xlim=c(0,1), ylim=c(0,1), xlab="fdr", ylab="power", main=paste("h2=", h2[i], " NQTN=", NQTN[j], sep="")) lines(Cor.Y~Cor.X, type="b", pch=2, col="blue") legend("topleft", c("GLM", "Cor"), col=c("red", "blue"), lty=1, pch=c(1, 2), cex=0.8, bty="n") } } dev.off() ## This is to generate comparison QQ and Manhattan plots ## for GWAS by Cor vs. GWAS by ALPACA. It uses whatever ## h2=0.75, alpha=1, NQTN=10, and a normal residual distribution. ###Code to run for GLM on 10 simulated QTNs index1to5=myGM[,2]<6 X1to5 = X[,index1to5] set.seed(99164) mySim <- G2P(X=X1to5, h2=0.75, alpha=1, NQTN=10, distribution="normal") myGLM <- ALPACA( y=mySim$y, X=myGD[, -1], GM=myGM, QTN.position=mySim$QTN.position ) order.GLM <- order(myGLM$P.value) ###Runs GWAS by cor on G2P myCor <- GWASbyCor(X, mySim$y) order.Cor <- order(myCor) ###Generate QQ plots ##QQ for GLM p.obs.glm=myGLM$P.value m2=length(p.obs.glm) p.uni.glm=runif(m2,0,1) order.obs.glm=order(p.obs.glm) order.uni.glm=order(p.uni.glm) ##QQ for GWAS by Cor p.obs.cor=myCor m2=length(p.obs.cor) p.uni.cor=runif(m2,0,1) order.obs.cor=order(p.obs.cor) order.uni.cor=order(p.uni.cor) ##Makes QQ plots for both in one file pdf(file="QQ.comparison.pdf") plot(-log10(p.uni.glm[order.uni.glm]), -log10(p.obs.glm[order.obs.glm]), main="GLM PCA GWAS", xlab="Expected -log10(p)", ylab="Observed -log10(p)") abline(a=0, b=1, col="red") plot(-log10(p.uni.cor[order.uni.cor]), -log10(p.obs.cor[order.obs.cor]), main="GWAS by Cor", xlab="Expected -log10(p)", ylab="Observed -log10(p)") abline(a=0, b=1, col="red") dev.off() ###Generate Manhattan plots m=ncol(myGD[,-1]) pdf(file="Manhattan.comparison.pdf") color.vector <- rep(rainbow(5), length(unique(myGM[,2]))) #GLM Manhattan plot(t(-log10(myGLM$P.value))~seq(1:m),col=color.vector[myGM[,2]], main="GLM Manhattan", xlab="", ylab="-log10(p)") if (!is.null(mySim$QTN.position)){ points(mySim$QTN.position, -log10(myGLM$P.value[mySim$QTN.position]), type="p", pch=20, cex=1,lwd=1.5,col="dimgrey") points(mySim$QTN.position, -log10(myGLM$P.value[mySim$QTN.position]), type="p", pch=21, cex=2,lwd=1.5,col="dimgrey") } abline(h=-log10(myGLM$cutoff.final), col="green") #Cor Manhattan color.vector.cor <- rep(rainbow(5), length(unique(myGM[,2]))) plot(t(-log10(myCor))~seq(1:m),col=color.vector[myGM[,2]], main="Cor Manhattan", xlab="", ylab="-log10(p)") if (!is.null(mySim$QTN.position)){ points(mySim$QTN.position, -log10(myCor[mySim$QTN.position]), type="p", pch=20, cex=1,lwd=1.5,col="dimgrey") points(mySim$QTN.position, -log10(myCor[mySim$QTN.position]), type="p", pch=21, cex=2,lwd=1.5,col="dimgrey") } abline(h=-log10(myGLM$cutoff.final), col="green") dev.off()