--- title: "LAB_5" author: "Zhou" date: "2/16/2021" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## 1. Phenotype simulation ### 1.1 load the genotype in numeric format ```{r} myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T) myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T) ``` ### 1.2 Phenotype simulation function ```{r} G2P=function(X,h2,alpha,NQTN,distribution){ n=nrow(X) m=ncol(X) #Sampling QTN QTN.position=sample(m,NQTN,replace=F) SNPQ=as.matrix(X[,QTN.position]) QTN.position #QTN effects if(distribution=="norm") {addeffect=rnorm(NQTN,0,1) }else {addeffect=alpha^(1:NQTN)} #Simulate phenotype effect=SNPQ%*%addeffect effectvar=var(effect) residualvar=(effectvar-h2*effectvar)/h2 residual=rnorm(n,0,sqrt(residualvar)) y=effect+residual return(list(addeffect = addeffect, y=y, add = effect, residual = residual, QTN.position=QTN.position, SNPQ=SNPQ)) } ``` ### 1.3.1 Change the heritability ```{r} par(mfrow=c(3,1),mar = c(3,4,1,1)) myG2P=G2P(X,.75,1,10,"norm") str(myG2P) va=var(myG2P$add) ve=var(myG2P$residual) vp=var(myG2P$y) v=matrix(c(va,ve,vp),1,3) colnames(v)=c("A", "E","P") barplot(v,col="gray") myG2P=G2P(X,.5,1,10,"norm") va=var(myG2P$add) ve=var(myG2P$residual) vp=var(myG2P$y) v=matrix(c(va,ve,vp),1,3) colnames(v)=c("A", "E","P") barplot(v,col="gray") myG2P=G2P(X,.25,1,10,"norm") va=var(myG2P$add) ve=var(myG2P$residual) vp=var(myG2P$y) v=matrix(c(va,ve,vp),1,3) colnames(v)=c("A", "E","P") barplot(v,col="gray") ``` ### 1.3.2 Change the number of QTN ```{r} par(mfrow=c(3,1)) myG2P=G2P(X,.5,1,2,"norm") plot(density(myG2P$y),ylim=c(0,1.5), main = "2 QTNs") lines(density(myG2P$add),col="blue") lines(density(myG2P$residual),col="red") myG2P=G2P(X,.5,1,10,"norm") plot(density(myG2P$y),ylim=c(0,.3), main = "10 QTNs") lines(density(myG2P$add),col="blue") lines(density(myG2P$residual),col="red") myG2P=G2P(X,.5,1,100,"norm") plot(density(myG2P$y),ylim=c(0,.1), main = "100 QTNs") lines(density(myG2P$add),col="blue") lines(density(myG2P$residual),col="red") ``` ### 1.3.3 Change the gene effect distribution to geom ```{r} par(mfrow=c(3,3),mar = c(3,4,1,1)) myG2P=G2P(X,.5,1,10,"geom") hist(myG2P$add) hist(myG2P$residual) hist(myG2P$y) myG2P=G2P(X,.5,.95,10,"geom") hist(myG2P$add) hist(myG2P$residual) hist(myG2P$y) myG2P=G2P(X,.5,.5,10,"geom") hist(myG2P$add) hist(myG2P$residual) hist(myG2P$y) dev.off() ``` ## 2. GWAS by correlation ### 2.1 Check the output of G2P ```{r} set.seed(99164) X=myGD[,-1] index1to5=myGM[,2]<6 X1to5 = X[,index1to5] mySim=G2P(X= X1to5,h2=.75,alpha=1,NQTN=10,distribution="norm") str(mySim) plot(myGM[,c(2,3)]) lines(myGM[mySim$QTN.position,c(2,3)],type="p",col="red") points(myGM[mySim$QTN.position,c(2,3)],type="p",col="blue",cex = 5) ``` ### 2.2 GWAS by correlation ```{r} # GWAS by correlation function GWASbyCor=function(X,y){ n=nrow(X) r=cor(y,X) n=nrow(X) t=r/sqrt((1-r^2)/(n-2)) p=2*(1-pt(abs(t),n-2)) zeros=p==0 p[zeros]=1e-10 return(p)} p = GWASbyCor(mySim$y,X) # Manhattan plots color.vector <- rep(c("deepskyblue","orange","forestgreen","indianred3"),10) m=ncol(X) plot(-log10(p)~seq(1:m),col=color.vector[myGM[,2]]) # True QTN position abline(v=mySim$QTN.position, lty = 2, lwd=2, col = "gray") mySim$QTN.position # Top 10 SNPs index=order(p) top10=index[1:10] top10 abline(h=-log10(max(p[top10])), lty = 1, lwd=3, col = "black") # Detected QTNs within top 10 detected=intersect(top10,mySim$QTN.position) detected abline(v= detected, lty = 2, lwd=2, col = "blue") # False positive within top 10 falsePositive=setdiff(top10, mySim$QTN.position) falsePositive abline(v= falsePositive, lty = 2, lwd=2, col = "red") ```