--- title: "Lab_7" author: "Zhou" date: "3/2/2021" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## 1. GWAS by correlation ```{r} # simulate the phenotype with G2P function myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) source("http://zzlab.net/StaGen/2020/R/G2P.R") source("http://zzlab.net/StaGen/2020/R/GWASbyCor.R") X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] set.seed(123) mySim=G2P(X= X1to5,h2=.75,alpha=1,NQTN=10,distribution="norm") p= GWASbyCor(X=X,y=mySim$y) # Manhattan plot color.vector <- rep(c("deepskyblue","orange","forestgreen","indianred3"),10) m=nrow(myGM) plot(t(-log10(p))~seq(1:m),col=color.vector[myGM[,2]]) # True QTN position abline(v=mySim$QTN.position, lty = 2, lwd=2, col = "gray") mySim$QTN.position # Top 10 SNPs index=order(p) top10=index[1:10] top10 abline(h=-log10(max(p[top10])), lty = 1, lwd=2, col = "black") # Detected QTNs within top 10 detected=intersect(top10,mySim$QTN.position) detected abline(v= detected, lty = 2, lwd=2, col = "blue") # False positive within top 10 falsePositive=setdiff(top10, mySim$QTN.position) falsePositive abline(v= falsePositive, lty = 2, lwd=2, col = "red") # Q3: calculate the false positive rate and true positive within top 10 SNPs # QQ plot p.obs=p[!index1to5] m2=length(p.obs) p.uni=runif(m2,0,1) order.obs=order(p.obs) order.uni=order(p.uni) plot(-log10(p.uni[order.uni]),-log10(p.obs[order.obs])) abline(a = 0, b = 1, col = "red") ``` ### Q4: calculate the false postitive rate when get the all QTNs. Try by youself. ```{r} ``` ## 2. GWAS by General Linear Model ### Add PCs in GWAS ```{r} G=myGD[,-1] n=nrow(G) m=ncol(G) P=matrix(NA,1,m) y = mySim$y # PCA # PCA=prcomp(X) for (i in 1:m){ x=G[,i] if(max(x)==min(x)){ p=1 }else{ X=cbind(1, PCA$x[,1:3],x) LHS=t(X)%*%X C=solve(LHS) RHS=t(X)%*%y b=C%*%RHS yb=X%*%b e=y-yb n=length(y) ve=sum(e^2)/(n-1) vt=C*ve t=b/sqrt(diag(vt)) p=2*(1-pt(abs(t),n-2)) } #end of testing variation P[i]=p[length(p)] } #end of looping for markers # Q5: Does the GLM include all the SNPs in the model in one time? p <- P # Manhattan plot color.vector <- rep(c("deepskyblue","orange","forestgreen","indianred3"),10) m=nrow(myGM) plot(t(-log10(p))~seq(1:m),col=color.vector[myGM[,2]]) # Q6: calculate the false postitive rate within top 10 SNPs with 3 PCs # QQ plot p.obs=P[!index1to5] m2=length(p.obs) p.uni=runif(m2,0,1) order.obs=order(p.obs) order.uni=order(p.uni) plot(-log10(p.uni[order.uni]), -log10(p.obs[order.obs]), ylim=c(0,7)) abline(a = 0, b = 1, col = "red") ```