`GAPIT.0000` <- function(){ ############################################################################################## #GAPIT: Genome Association and Prediction Integrated Tool #Objective 1: State of art methods for high power, accuracy and speed; #Objective 2: User friendly by design, help documents, and web forum; #Objective 3: Comprehensive output to interpret data and results; #Objective 4: Informative tables and high quality figures for reports and publication; #Methods implimented: # 1. GLM (Structure or Q method for GWAS, Pritchard et. al. Genetics, 2000) # 2. MLM (Q+K, Yu et. al. Nature Genetics, 2006) # 3. gBLUP (Marker based kinship, Zhang et. al. Journal of Animal Science, 2007) # 4. PCA (Zhao et. al. Plos Genetics, 2007) # 5. EMMA (Kang et. al. Genetics, 2008) # 6. CMLM (Zhang et. al. Nature Genetics, 2010) # 7. EMMAx (Kang et. al. Nature Genetics, 2010) # 8. P3D (Zhang et. al. Nature Genetics, 2010) # 9. FaST-LMM (Lippert et. al. Nature Methods, 2011) # 10. ECMLM (Li et. al. BMC Bioogy, 2014) # 11. SUPER (Wang et. al. PLoS One, 2014) #Designed by Zhiwu Zhang #Authors of paper on Bioinformatics (2012, 28:2397-2399): Alex Lipka, Feng Tian, Qishan Wang, Xiaolei Liu, Meng Li,You Tang and Zhiwu Zhang #Authors of paper on Plant Genome (2016, Vol 9, No. 2): You Tang, Xiaolei Liu, Jiabo Wang, Meng Li, Qishan Wang, Feng Tian, Zhongbin Su, Yuchun Pan, Di Liu, Alexander E. Lipka, Edward S. Buckler, and Zhiwu Zhang if(!require(multtest)) { if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("multtest") #source("http://www.bioconductor.org/biocLite.R") #biocLite("multtest") } if(!require(gplots)) install.packages("gplots") if(!require(LDheatmap)) install.packages("LDheatmap") if(!require(genetics)) install.packages("genetics") if(!require(ape)) install.packages("ape") if(!require(EMMREML)) install.packages("EMMREML") if(!require(scatterplot3d)) install.packages("scatterplot3d") #if(!require(scatterplot3d)) install.packages("scatterplot3d") # required_pkg = c("MASS", "data.table","biganalytics","ape", "magrittr","bigmemory", "gplots", "compiler", "scatterplot3d", "R.utils", "rrBLUP", "BGLR") # missing_pkg = required_pkg[!(required_pkg %in% installed.packages()[,"Package"])] # if(length(missing_pkg)) install.packages(missing_pkg, repos="http://cran.rstudio.com/") if(!'multtest'%in% installed.packages()[,"Package"]){ if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("snpStats") #source("http://www.bioconductor.org/biocLite.R") #biocLite("multtest") #biocLite("snpStats") } GAPIT.Version="2018.08.18, GAPIT 3.0" print(paste("All packages are loaded already ! ","GAPIT.Version is ",GAPIT.Version,sep="")) return(GAPIT.Version) } #============================================================================================= #Object: To calculate Area Under (ROC) Curve (AUC) #Straitegy: NA #Output: P value #intput: beta-power and alpha-fdr or type I error #Authors: Zhiwu Zhang #Last update: December 18, 2015 ############################################################################################## GAPIT.AUC=function(beta=NULL,alpha=NULL){ n=length(beta) #plot(alpha,beta,type="b") db=beta[-1]-beta[-n] da=1-.5*(alpha[-1]+alpha[-n]) ab=da*db AUC=sum(ab) return(AUC) } #============================================================================================= #Object: To generate binary phenotype #Straitegy: NA #Output: binary phenotype (0 and 1's) #intput: genetic effect (x), hertiability (h2) and ratio of 1's (r) #Authors: Zhiwu Zhang #Last update: March 18, 2016 ############################################################################################## `GAPIT.BIPH` <- function(x=0,h2=.5,r=.25){ #To assign probability for given standard normal variable x and h2 #Author: Zhiwu Zhang #Last update: Febuary 27, 2016 p=pnorm(x) srp=1-p-r sh=1/(1-sqrt(h2)) adj=(r-.5)*(1-sqrt(h2)) f=1/(1+exp(sh*srp))+adj return(f) } #============================================================================================= `GAPIT.Block` <- function(Z,GA,KG){ #Object: To split a group kinship into two blocks containing individuals with and without phenotype #Output: GAU,KW,KO,KWO #Authors: Zhiwu Zhang and Alex Lipka # Last update: April 14, 2011 ############################################################################################## # To separate group kiship into two blocks: with and without phenotype. # A group goes to with phenotype as loog as it has one phenotyped individual. #find position in group assignment (GA) for the individual associate with phenotype (specified by Z) #taxa=unique(intersect(as.matrix(Z[1,-1]),GA[,1])) taxa.Z=as.matrix(Z[1,-1]) taxa.GA=as.matrix(GA[,1]) position=taxa.GA%in%taxa.Z #Initial block as 2 GAU=cbind(GA,2) #Assign block as 1 if the individual has phenotype GAU[position,3]=1 #Modify the non-phenotyped individuals if they in a group with phenotyped individuals #To find the groups with phenotyped individuals #update block assignment for all these groups #get list of group that should be block 1 #grp.12=as.matrix(unique(GAU[,2])) #grp.1=as.matrix(unique(GAU[which(GAU[,3]==1),2])) #grp.2= as.matrix(setdiff(grp.12,grp.1)) grp.12=as.matrix(as.vector(unique(GAU[,2])) ) #unique group grp.1=as.matrix(as.vector(unique(GAU[which(GAU[,3]==1),2])) ) #unique phenotyped group grp.2= as.matrix(as.vector(setdiff(grp.12,grp.1))) #unique unphenotyped group numWithout=length(grp.2) order.1=1:length(grp.1) order.2=1:length(grp.2) if(numWithout >0) grpblock=as.matrix(rbind(cbind(grp.1,1,order.1), cbind(grp.2, 2, order.2))) if(numWithout==0) grpblock=as.matrix( cbind(grp.1,1,order.1), ) order.block=order(as.matrix(GAU[,3])) colnames(grpblock)=c("grp","block","ID") #Indicators: 1-Phenotype, 1.5- unphenotyped but in a group with other phenotyped, 2-rest (Zhiwu, Dec 7,2012) #GAU0 <- merge(GAU[order.block,-3], grpblock, by.x = "X2", by.y = "grp") #GAU=GAU0[,c(2,1,3,4)] #print(head(GAU)) GAU1 <- merge(GAU[order.block,], grpblock, by.x = "X2", by.y = "grp") #print(GAU1) GAU1[,4]=(as.numeric(GAU1[,3])+as.numeric(GAU1[,4]))/2 #print(GAU1) GAU=GAU1[,c(2,1,4,5)] KW=KG[grp.1,grp.1] KO=KG[grp.2,grp.2] KWO=KG[grp.1,grp.2] #write.table(GAU, "GAU.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE) #print("GAPIT.Block accomplished successfully!") return(list(GAU=GAU,KW=KW,KO=KO,KWO=KWO)) }#The function GAPIT.Block ends here #============================================================================================= `GAPIT.Bread` <- function(Y=NULL,CV=NULL,Z=NULL,KI=NULL,GK=NULL,GD=NULL,GM=NULL, method=NULL,delta=NULL,vg=NULL,ve=NULL,LD=0.01,GTindex=NULL, file.output=TRUE,opt="extBIC"){ #Object: To calculate p-values of SNPs by using method of GLM, MLM, CMLM, FaST, SUPER and DC #Straitegy: NA #Output: GWAS, GPS,REMLs,vg,ve,delta #intput: #Y: phenotype with columns of taxa,Y1,Y2... #CV: covariate variables with columns of taxa, v1,v2... #GD: same as GK. This is the genotype to screen, the columns are taxa,SNP1,SNP2,... #GK: Genotype data in numerical format, taxa goes to row and snp go ti columns. the first column is taxa #GM: Genotype map with columns of snpID,chromosome and position #method: Options are GLM, MLM, CMLM, FaST, SUPER ,FARM-CPU and DC #Authors: Zhiwu Zhang #Last update: November 2, 2011 ############################################################################################## #print("GAPIT.SUPER in progress...") #Performing first screening with GLM if(method=="GLM"){ #print("---------------screening by GLM----------------------------------") #print(GTindex) myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, model=("GLM"), QC=FALSE, GTindex=GTindex, file.output=file.output ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with MLM if(method=="MLM"){ #print("---------------screening by MLM----------------------------------") myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, group.from=nrow(Y), group.to=nrow(Y), QC=FALSE, GTindex=GTindex, file.output=file.output ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with Compressed MLM if(method=="CMLM"){ #print("---------------screening by CMLM----------------------------------") myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, group.from=1, group.to=nrow(Y), QC=FALSE, GTindex=GTindex, file.output=file.output ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with FaST-LMM if(method=="FaST" | method=="SUPER"| method=="DC") { GWAS=NULL GPS=NULL if(!is.null(vg) & !is.null(vg) & is.null(delta)) delta=ve/vg if(is.null(vg) & is.null(ve)) { myFaSTREML=GAPIT.get.LL(pheno=matrix(Y[,-1],nrow(Y),1),geno=NULL,snp.pool=as.matrix(GK[,-1]),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1]))) #print("Transfer data...") REMLs=-2*myFaSTREML$LL delta=myFaSTREML$delta vg=myFaSTREML$vg ve=myFaSTREML$ve #GPS=myFaSTREML$GPS } mySUPERFaST=GAPIT.SUPER.FastMLM(ys=matrix(Y[,-1],nrow(Y),1),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1])),snp.pool=as.matrix(GK[-1]), xs=as.matrix(GD[GTindex,-1]),vg=vg,delta=delta,LD=LD,method=method) GWAS=cbind(GM,mySUPERFaST$ps,mySUPERFaST$stats,mySUPERFaST$dfs,mySUPERFaST$effect) }#End of if(method=="FaST" | method=="SUPER") #FarmCPU if(method=="FarmCPU") { if(!require(bigmemory)) install.packages("bigmemory") if(!require(biganalytics)) install.packages("biganalytics") library(bigmemory) #for FARM-CPU library(biganalytics) #for FARM-CPU if(!exists('FarmCPU', mode='function'))source("http://www.zzlab.net/FarmCPU/FarmCPU_functions.txt")#web source code colnames(GM)[1]="SNP" myFarmCPU=FarmCPU( Y=Y,#Phenotype GD=GD,#Genotype GM=GM,#Map information CV=CV[,2:ncol(CV)], file.output=T ) xs=t(GD[,-1]) #print(dim(xs)) gene_taxa=colnames(GD)[-1] ss=apply(xs,1,sum) ns=nrow(GD) storage=cbind(.5*ss/ns,1-.5*ss/ns) maf=as.data.frame(cbind(gene_taxa,apply(cbind(.5*ss/ns,1-.5*ss/ns),1,min))) colnames(maf)=c("SNP","maf") nobs=ns #print(dim(myFarmCPU$GWAS)) #print(length(maf)) myFarmCPU$GWAS=merge(myFarmCPU$GWAS[,-5],maf, by.x = "SNP", by.y = "SNP") GWAS=cbind(myFarmCPU$GWAS,nobs) GWAS=GWAS[order(GWAS$P.value),] #colnames(GWAS)=c("SNP","Chromosome","Position","mp","mc","maf","nobs") GPS=myFarmCPU$Pred h2=NULL vg=NULL ve=NULL delta=NULL REMLs=NULL #colnames(GPS)[3]=c("Prediction") } #MLMM if(method=="MLMM") { print(" GWAS by MLMM method !!") Y=Y[!is.na(Y[,2]),] taxa_Y=as.character(Y[,1]) taxa_GD=as.character(GD[,1]) taxa_CV=as.character(CV[,1]) GD=GD[taxa_GD%in%taxa_Y,] CV=CV[taxa_CV%in%taxa_Y,] #print(dim(Y)) #print(dim(GD)) #print(dim(CV)) KI= GAPIT.kinship.VanRaden(snps=as.matrix(GD[,-1])) colnames(KI)=as.character(GD[,1]) if(is.null(CV)) { mymlmm=mlmm( Y=Y[,2],#Phenotype X=as.matrix(GD[,-1]),#Genotype K=as.matrix(KI), #cofs=CV[,2:ncol(CV)], nbchunks = 2, maxsteps = 10, thresh = 1.2 * 10^-5) }else{ mymlmm=mlmm_cof( Y=Y[,2],#Phenotype X=as.matrix(GD[,-1]),#Genotype K=as.matrix(KI), cofs=as.matrix(CV[,2:ncol(CV)]), nbchunks = 2, maxsteps = 10, thresh = 1.2 * 10^-5) } if(opt=='extBIC'){ GWAS_result=mymlmm$opt_extBIC$out } if(opt=='mbonf'){ GWAS_result=mymlmm$opt_mbonf$out } if(opt=='thresh'){ GWAS_result=mymlmm$opt_thresh$out } colnames(GWAS_result)=c("SNP","P.value") xs=t(GD[,-1]) #print(dim(xs)) gene_taxa=colnames(GD)[-1] colnames(GM)=c("SNP","Chromosome","position") ss=apply(xs,1,sum) ns=nrow(GD) storage=cbind(.5*ss/ns,1-.5*ss/ns) maf=as.data.frame(cbind(gene_taxa,apply(cbind(.5*ss/ns,1-.5*ss/ns),1,min))) colnames(maf)=c("SNP","maf") nobs=ns GWAS_GM=merge(GM,GWAS_result, by.x = "SNP", by.y = "SNP") mc=matrix(NA,nrow(GWAS_GM),1) GWAS_GM=cbind(GWAS_GM,mc) GWAS_GM_maf=merge(GWAS_GM,maf, by.x = "SNP", by.y = "SNP") GWAS=cbind(GWAS_GM_maf,nobs) #print(head(GWAS)) GWAS=GWAS[order(GWAS$P.value),] GPS=NULL #h2=mymlmm$step_table$h2[length(mymlmm$step_table$h2)] h2=NULL vg=NULL ve=NULL delta=NULL REMLs=NULL colnames(GWAS)=c("SNP","Chromosome","Position","P.value","effec","maf","nobs") } #print("GAPIT.Bread succeed!") return (list(GWAS=GWAS, GPS=GPS,REMLs=REMLs,vg=vg,ve=ve,delta=delta)) } #end of GAPIT.Bread #============================================================================================= `GAPIT.Burger` <- function(Y=NULL,CV=NULL,GK=NULL){ #Object: To calculate likelihood, variances and ratio #Straitegy: NA #Output: P value #intput: #Y: phenotype with columns of taxa,Y1,Y2... #CV: covariate variables with columns of taxa,v1,v2... #GK: Genotype data in numerical format, taxa goes to row and snp go to columns. the first column is taxa (same as GAPIT.bread) #Authors: Xiaolei Liu ,Jiabo Wang and Zhiwu Zhang #Last update: November 2, 2015 ############################################################################################## #print("GAPIT.Burger in progress...") if(!is.null(CV)){ #CV=as.matrix(CV)#change CV to a matrix when it is a vector xiaolei changed here #theCV=as.matrix(cbind(matrix(1,nrow(CV),1),CV)) ###########for FarmCPU theCV=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1])) #reseted by Jiabo ,CV frame is wrong,and not rm taxa #############for GAPIT other method GWAS }else{ theCV=matrix(1,nrow(Y),1) } #handler of single column GK n=nrow(GK) m=ncol(GK) if(m>2){ theGK=as.matrix(GK[,-1]) }else{ theGK=matrix(GK[,-1],n,1) } myFaSTREML=GAPIT.get.LL(pheno=matrix(Y[,-1],nrow(Y),1),geno=NULL,snp.pool=theGK,X0=theCV ) REMLs=-2*myFaSTREML$LL delta=myFaSTREML$delta vg=myFaSTREML$vg ve=myFaSTREML$ve #print("GAPIT.Burger succeed!") return (list(REMLs=REMLs,vg=vg,ve=ve,delta=delta)) } #end of GAPIT.Burger.Bus #============================================================================================= `GAPIT.Bus`<- function(Y=NULL,CV=NULL,Z=NULL,GT=NULL,KI=NULL,GK=NULL,GD=NULL,GM=NULL, WS=c(1e0,1e3,1e4,1e5,1e6,1e7),alpha=c(.01,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,1), method=NULL,delta=NULL,vg=NULL,ve=NULL,LD=0.01,GTindex=NULL, cutOff=0.01,Multi_iter=FASLE,num_regwas=10,Random.model=FALSE, p.threshold=NA,QTN.threshold=0.01,maf.threshold=0.03, method.GLM="FarmCPU.LM",method.sub="reward",method.sub.final="reward",method.bin="static", DPP=1000000,bin.size=c(5e5,5e6,5e7),bin.selection=seq(10,100,10), file.output=TRUE,opt="extBIC"){ #Object: To license data by method #Output: Coresponding numerical value # This function is used to run multiple method, Thanks MLMM FarmCPU Blink to share program and code. #Authors: Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novenber 3, 2016 ############################################################################################## GR=NULL #print(head(CV)) if(method=="GLM"){ #print("---------------screening by GLM----------------------------------") myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, group.from=0, group.to=0, QC=FALSE, GTindex=GTindex, file.output=F ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with MLM if(method=="MLM"){ #print("---------------screening by MLM----------------------------------") myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, group.from=nrow(Y), group.to=nrow(Y), QC=FALSE, GTindex=GTindex, file.output=F ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with Compressed MLM if(method=="CMLM"){ #print("---------------screening by CMLM----------------------------------") myGAPIT <- GAPIT( Y=Y, CV=CV, Z=Z, KI=KI, GD=GD, GM=GM, group.from=1, group.to=nrow(Y), QC=FALSE, GTindex=GTindex, file.output=F ) GWAS=myGAPIT$GWAS GPS=myGAPIT$GPS REMLs=myGAPIT$REMLs delta=myGAPIT$ve/myGAPIT$va vg=myGAPIT$vg ve=myGAPIT$ve } #Performing first screening with FaST-LMM if(method=="FaST" | method=="SUPER"| method=="DC") { GWAS=NULL GPS=NULL if(!is.null(vg) & !is.null(vg) & is.null(delta)) delta=ve/vg if(is.null(vg) & is.null(ve)) { #print("!!!!!!!!!!!!!!!!") myFaSTREML=GAPIT.get.LL(pheno=matrix(Y[,-1],nrow(Y),1),geno=NULL,snp.pool=as.matrix(GK[,-1]),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1]))) #print(myFaSTREML) #print("Transfer data...") REMLs=-2*myFaSTREML$LL delta=myFaSTREML$delta vg=myFaSTREML$vg ve=myFaSTREML$ve #GPS=myFaSTREML$GPS } mySUPERFaST=GAPIT.SUPER.FastMLM(ys=matrix(Y[,-1],nrow(Y),1),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1])),snp.pool=as.matrix(GK[-1]), xs=as.matrix(GD[GTindex,-1]),vg=vg,delta=delta,LD=LD,method=method) GWAS=cbind(GM,mySUPERFaST$ps,mySUPERFaST$stats,mySUPERFaST$dfs,mySUPERFaST$effect) }#End of if(method=="FaST" | method=="SUPER") if(method=="FarmCPU") { if(!require(bigmemory)) install.packages("bigmemory") if(!require(biganalytics)) install.packages("biganalytics") library(bigmemory) #for FARM-CPU library(biganalytics) #for FARM-CPU if(!exists('FarmCPU', mode='function'))source("http://www.zzlab.net/FarmCPU/FarmCPU_functions.txt")#web source code colnames(GM)[1]="SNP" #print(GTindex) if(!is.null(CV)) { farmcpuCV=CV[,2:ncol(CV)] }else{ farmcpuCV=NULL } #print(head(farmcpuCV)) # print(dim(GD)) # print(dim(farmcpuCV)) #print(Y) myFarmCPU=FarmCPU( Y=Y,#Phenotype GD=GD,#Genotype GM=GM,#Map information CV=farmcpuCV, cutOff=cutOff,p.threshold=p.threshold,QTN.threshold=QTN.threshold, maf.threshold=maf.threshold,method.GLM=method.GLM,method.sub=method.sub, method.sub.final=method.sub.final,method.bin=method.bin,bin.size=c(5e5,5e6,5e7),bin.selection=seq(10,100,10), file.output=T ) taxa=names(Y)[2] #print(taxa) GWAS=myFarmCPU$GWAS #print(head(GWAS)) xs=t(GD[,-1]) ss=apply(xs,1,sum) ns=nrow(GD) nobs=ns GWAS=cbind(GWAS,nobs) maf=apply(cbind(.5*ss/ns,1-.5*ss/ns),1,min) GWAS$maf=maf #print(head(GWAS)) if(Multi_iter) { sig=GWAS[GWAS[,4]<(cutOff/(nrow(GWAS))),1:5] sig=sig[!is.na(sig[,4]),] sig_position=as.numeric(as.matrix(sig[,1:3])[,2])*10^10+as.numeric(as.matrix(sig[,1:3])[,3]) sig=sig[order(sig_position),] sig_order=as.numeric(rownames(sig)) n=nrow(sig) diff_order=abs(sig_order[-length(sig_order)]-sig_order[-1]) diff_index=diff_order0) { for(i in 1:num_bins) { n_sig=sig_bins[i] if(i==1) { j=1:n_sig }else{ j=(sum(sig_bins[1:(i-1)])+1):sum(sig_bins[1:i]) } aim_marker=sig[j,] #print(aim_marker) aim_order=as.numeric(rownames(aim_marker)) aim_area=rep(FALSE,(nrow(GWAS))) # print(head(sig)) # print(aim_order) #aim_area[c((aim_order-num_regwas):(aim_order-1),(aim_order+1):(aim_order+num_regwas))]=TRUE if(min(aim_order)0) { for(i in 1:num_bins) { n_sig=sig_bins[i] if(i==1) { j=1:n_sig }else{ j=(sum(sig_bins[1:(i-1)])+1):sum(sig_bins[1:i]) } aim_marker=sig[j,] #print(aim_marker) aim_order=as.numeric(rownames(aim_marker)) aim_area=rep(FALSE,(nrow(GWAS))) #aim_area[c((aim_order-num_regwas):(aim_order-1),(aim_order+1):(aim_order+num_regwas))]=TRUE aim_area[c((min(aim_order)-num_regwas):(max(aim_order)+num_regwas))]=TRUE aim_area[aim_order]=FALSE aim_area=aim_area[1:(nrow(GWAS))] if(setequal(aim_area,logical(0))) next # if(aim_matrix[rownames(aim_matrix)=="TRUE",1]<10) next # aim_area[GM[,1]==aim_marker[,1]]=FALSE secondGD=GD[,c(TRUE,aim_area)] secondGM=GM[aim_area,] myGAPIT_Second =Blink(Y=Y,GD=secondGD,GM=secondGM,CV=blink_CV,maxLoop=10,time.cal=T) #print(head(myBlink$GWAS)) #GWAS=myBlink$GWAS[,1:4] Second_GWAS= myGAPIT_Second$GWAS [,1:4] Second_GWAS[is.na(Second_GWAS[,4]),4]=1 orignal_GWAS=GWAS[aim_area,] GWAS_index=match(Second_GWAS[,1],GWAS[,1]) #test_GWAS=GWAS GWAS[GWAS_index,4]=Second_GWAS[,4] } } } GWAS[,2]=as.numeric(as.character(GWAS[,2])) GWAS[,3]=as.numeric(as.character(GWAS[,3])) #rint(head(GWAS)) GPS=myBlink$Pred #print(head(GWAS)) GWAS=GWAS[,c(1:5,7,6)] if(Random.model)GR=GAPIT.RandomModel(Y=blink_Y,X=GD[,-1],GWAS=GWAS,CV=CV,cutOff=cutOff,GT=GT) h2=NULL vg=NULL ve=NULL delta=NULL REMLs=NULL system(paste("rm -f FarmCPU.",taxa,".GWAS.Results.csv",sep="")) system(paste("rm -f FarmCPU.",taxa,".Manhattan.Plot.Genomewise.pdf",sep="")) system(paste("rm -f FarmCPU.",taxa,".QQ-Plot.pdf",sep="")) #print(head(GWAS)) print("Bink R is done !!!!!") } if(method=="MLMM") { print("GWAS by MLMM method !!") Y=Y[!is.na(Y[,2]),] taxa_Y=as.character(Y[,1]) taxa_GD=as.character(GD[,1]) taxa_CV=as.character(CV[,1]) GD=GD[taxa_GD%in%taxa_Y,] CV=CV[taxa_CV%in%taxa_Y,] #print(dim(Y)) #print(dim(GD)) if(is.null(KI)) { KI= GAPIT.kinship.VanRaden(snps=as.matrix(GD[,-1])) colnames(KI)=as.character(GD[,1]) }else{ print("The Kinship is provided by user !!") KI=KI[,-1] #colnames(KI)=as.character(GD[,1]) taxa_KI=as.character(colnames(KI)) KI=KI[taxa_KI%in%as.character(GD[,1]),taxa_KI%in%as.character(GD[,1])] } if(ncol(KI)!=nrow(GD)) print("Please make sure dim of K equal number of GD !!") # print(dim(KI)) # print(dim(GD)) # print(KI[1:5,1:5]) if(is.null(CV)) { mymlmm=mlmm( Y=Y[,2],#Phenotype X=as.matrix(GD[,-1]),#Genotype K=as.matrix(KI), #cofs=CV[,2:ncol(CV)], nbchunks = 2, maxsteps = 10, thresh = 1.2 * 10^-5) }else{ mymlmm=mlmm_cof( Y=Y[,2],#Phenotype X=as.matrix(GD[,-1]),#Genotype K=as.matrix(KI), cofs=as.matrix(CV[,2:ncol(CV)]), nbchunks = 2, maxsteps = 10, thresh = 1.2 * 10^-5) } if(opt=='extBIC'){ GWAS_result=mymlmm$opt_extBIC$out } if(opt=='mbonf'){ GWAS_result=mymlmm$opt_mbonf$out } if(opt=='thresh'){ GWAS_result=mymlmm$opt_thresh$out } colnames(GWAS_result)=c("SNP","P.value") xs=t(GD[,-1]) #print(dim(xs)) gene_taxa=colnames(GD)[-1] colnames(GM)=c("SNP","Chromosome","position") ss=apply(xs,1,sum) ns=nrow(GD) storage=cbind(.5*ss/ns,1-.5*ss/ns) maf=as.data.frame(cbind(gene_taxa,apply(cbind(.5*ss/ns,1-.5*ss/ns),1,min))) colnames(maf)=c("SNP","maf") nobs=ns GWAS_GM=merge(GM,GWAS_result, by.x = "SNP", by.y = "SNP") mc=matrix(NA,nrow(GWAS_GM),1) GWAS_GM=cbind(GWAS_GM,mc) #print(head(GWAS_GM)) #print(head(maf)) #maf=NULL GWAS_GM_maf=merge(GWAS_GM,maf, by.x = "SNP", by.y = "SNP") GWAS=cbind(GWAS_GM_maf,nobs) #print(head(GWAS)) GWAS=GWAS[order(GWAS$P.value),] GWAS[,2]=as.numeric(as.character(GWAS[,2])) GWAS[,3]=as.numeric(as.character(GWAS[,3])) GPS=NULL #h2=mymlmm$step_table$h2[length(mymlmm$step_table$h2)] h2=NULL vg=NULL ve=NULL delta=NULL REMLs=NULL colnames(GWAS)=c("SNP","Chromosome","Position","P.value","effect","maf","nobs") } #print("GAPIT.Bus succeed!") return (list(GWAS=GWAS, GPS=GPS,REMLs=REMLs,vg=vg,ve=ve,delta=delta,GVs=GR$GVs)) } #end of GAPIT.Bus #============================================================================================= `GAPIT.CVMergePC` <- function(X,Y){ #Object: To convert character SNP genotpe to numerical #Output: Coresponding numerical value #Authors: Feng Tian and Zhiwu Zhang # Last update: May 30, 2011 ############################################################################################## #Z=X+Y Z <- merge(X, Y, by.x = colnames(X)[1], by.y = colnames(Y)[1]) return(Z) }#end of GAPIT.CVMergePCfunction #============================================================================================= ########## These three functions come from MVP package, Jiabo did some modifications ########## Following Apache License, we thank MVP developper to build these functions. ########## 1 creat P value scale in addtitional chromsome ########## 2 set col is same as GAPIT ########## 3 circle.plot <- function(myr,type="l",x=NULL,lty=1,lwd=1,col="black",add=TRUE,n.point=1000) { curve(sqrt(myr^2-x^2),xlim=c(-myr,myr),n=n.point,ylim=c(-myr,myr),type=type,lty=lty,col=col,lwd=lwd,add=add) curve(-sqrt(myr^2-x^2),xlim=c(-myr,myr),n=n.point,ylim=c(-myr,myr),type=type,lty=lty,col=col,lwd=lwd,add=TRUE) } Densitplot <- function( map, col=c("darkblue", "white", "red"), main="SNP Density", bin=1e6, band=3, width=5, legend.len=10, legend.max=NULL, legend.pt.cex=3, legend.cex=1, legend.y.intersp=1, legend.x.intersp=1, plot=TRUE ) { #print(head(map)) map <- as.matrix(map) map <- map[!is.na(map[, 2]), ] map <- map[!is.na(map[, 3]), ] map <- map[map[, 2] != 0, ] #map <- map[map[, 3] != 0, ] options(warn = -1) max.chr <- max(as.numeric(map[, 2]), na.rm=TRUE) if(is.infinite(max.chr)) max.chr <- 0 map.xy.index <- which(!as.numeric(map[, 2]) %in% c(0 : max.chr)) if(length(map.xy.index) != 0){ chr.xy <- unique(map[map.xy.index, 2]) for(i in 1:length(chr.xy)){ map[map[, 2] == chr.xy[i], 2] <- max.chr + i } } map <- map[order(as.numeric(map[, 2]), as.numeric(map[, 3])), ] chr <- as.numeric(map[, 2]) pos <- as.numeric(map[, 3]) chr.num <- unique(chr) #print(chr.num) chorm.maxlen <- max(pos) if(plot) plot(NULL, xlim=c(0, chorm.maxlen + chorm.maxlen/10), ylim=c(0, length(chr.num) * band + band), main=main,axes=FALSE, xlab="", ylab="", xaxs="i", yaxs="i") pos.x <- list() col.index <- list() maxbin.num <- NULL #print(chr.num) for(i in 1 : length(chr.num)){ pos.x[[i]] <- pos[which(chr == chr.num[i])] cut.len <- ceiling((max(pos.x[[i]]) - min(pos.x[[i]])) / bin) if(cut.len <= 1){ col.index[[i]] = 1 }else{ cut.r <- cut(pos.x[[i]], cut.len, labels=FALSE) eachbin.num <- table(cut.r) #print(eachbin.num) maxbin.num <- c(maxbin.num, max(eachbin.num)) col.index[[i]] <- rep(eachbin.num, eachbin.num) } } Maxbin.num <- max(maxbin.num) maxbin.num <- Maxbin.num if(!is.null(legend.max)){ maxbin.num <- legend.max } #print(col) #print(maxbin.num) col=colorRampPalette(col)(maxbin.num) col.seg=NULL for(i in 1 : length(chr.num)){ if(plot) polygon(c(0, 0, max(pos.x[[i]]), max(pos.x[[i]])), c(-width/5 - band * (i - length(chr.num) - 1), width/5 - band * (i - length(chr.num) - 1), width/5 - band * (i - length(chr.num) - 1), -width/5 - band * (i - length(chr.num) - 1)), col="grey", border="grey") if(!is.null(legend.max)){ if(legend.max < Maxbin.num){ col.index[[i]][col.index[[i]] > legend.max] <- legend.max } } col.seg <- c(col.seg, col[round(col.index[[i]] * length(col) / maxbin.num)]) if(plot) segments(pos.x[[i]], -width/5 - band * (i - length(chr.num) - 1), pos.x[[i]], width/5 - band * (i - length(chr.num) - 1), col=col[round(col.index[[i]] * length(col) / maxbin.num)], lwd=1) } if(length(map.xy.index) != 0){ for(i in 1:length(chr.xy)){ chr.num[chr.num == max.chr + i] <- chr.xy[i] } } chr.num <- rev(chr.num) if(plot) mtext(at=seq(band, length(chr.num) * band, band),text=paste("Chr", chr.num, sep=""), side=2, las=2, font=1, cex=0.6, line=0.2) if(plot) axis(3, at=seq(0, chorm.maxlen, length=10), labels=c(NA, paste(round((seq(0, chorm.maxlen, length=10))[-1] / 1e6, 0), "Mb", sep="")), font=1, cex.axis=0.8, tck=0.01, lwd=2, padj=1.2) # image(c(chorm.maxlen-chorm.maxlen * legend.width / 20 , chorm.maxlen), # round(seq(band - width/5, (length(chr.num) * band + band) * legend.height / 2 , length=maxbin.num+1), 2), # t(matrix(0 : maxbin.num)), col=c("white", rev(heat.colors(maxbin.num))), add=TRUE) legend.y <- round(seq(0, maxbin.num, length=legend.len)) len <- legend.y[2] legend.y <- seq(0, maxbin.num, len) if(!is.null(legend.max)){ if(legend.max < Maxbin.num){ if(!maxbin.num %in% legend.y){ legend.y <- c(legend.y, paste(">=", maxbin.num, sep="")) legend.y.col <- c(legend.y[c(-1, -length(legend.y))], maxbin.num) }else{ legend.y[length(legend.y)] <- paste(">=", maxbin.num, sep="") legend.y.col <- c(legend.y[c(-1, -length(legend.y))], maxbin.num) } }else{ if(!maxbin.num %in% legend.y){ legend.y <- c(legend.y, maxbin.num) } legend.y.col <- c(legend.y[-1]) } }else{ if(!maxbin.num %in% legend.y){ legend.y <- c(legend.y, paste(">", max(legend.y), sep="")) legend.y.col <- c(legend.y[c(-1, -length(legend.y))], maxbin.num) }else{ legend.y.col <- c(legend.y[-1]) } } legend.y.col <- as.numeric(legend.y.col) legend.col <- c("grey", col[round(legend.y.col * length(col) / maxbin.num)]) if(plot) legend(x=(chorm.maxlen + chorm.maxlen/100), y=( -width/2.5 - band * (length(chr.num) - length(chr.num) - 1)), title="", legend=legend.y, pch=15, pt.cex = legend.pt.cex, col=legend.col, cex=legend.cex, bty="n", y.intersp=legend.y.intersp, x.intersp=legend.x.intersp, yjust=0, xjust=0, xpd=TRUE) if(!plot) return(list(den.col=col.seg, legend.col=legend.col, legend.y=legend.y)) } GAPIT.Circle.Manhatton.Plot <- function( Pmap, col=c("#377EB8", "#4DAF4A", "#984EA3", "#FF7F00"), #col=c("darkgreen", "darkblue", "darkyellow", "darkred"), bin.size=1e6, bin.max=NULL, pch=19, band=1, cir.band=0.5, H=1.5, ylim=NULL, cex.axis=1, plot.type="c", multracks=TRUE, cex=c(0.5,0.8,1), r=0.3, xlab="Chromosome", ylab=expression(-log[10](italic(p))), xaxs="i", yaxs="r", outward=TRUE, threshold = 0.01, threshold.col="red", threshold.lwd=1, threshold.lty=2, amplify= TRUE, # is that available for remark signal pch col chr.labels=NULL, signal.cex = 2, signal.pch = 8, signal.col="red", signal.line=NULL, cir.chr=TRUE, cir.chr.h=1.3, chr.den.col=c("darkgreen", "yellow", "red"), #chr.den.col=c(126,177,153), cir.legend=TRUE, cir.legend.cex=0.8, cir.legend.col="grey45", LOG10=TRUE, box=FALSE, conf.int.col="grey", file.output=TRUE, file="pdf", dpi=300, xz=NULL, memo="" ) { #print("Starting Circular-Manhattan plot!",quote=F) taxa=colnames(Pmap)[-c(1:3)] if(!is.null(memo) && memo != "") memo <- paste("_", memo, sep="") if(length(taxa) == 0) taxa <- "Index" taxa <- paste(taxa, memo, sep="") col=rep(c( '#FF6A6A', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(length(taxa)/5)) legend.bit=round(nrow(Pmap)/30) numeric.chr <- as.numeric(Pmap[, 1]) options(warn = 0) max.chr <- max(numeric.chr, na.rm=TRUE) aa=Pmap[1:legend.bit,] aa[,2]=max.chr+1 #print(aa[,3]) aa[,3]=sample(1:10^7.5,legend.bit) aa[,-c(1:3)]=0 Pmap=rbind(Pmap,aa) #print(unique(Pmap[,2])) #SNP-Density plot if("d" %in% plot.type){ print("SNP_Density Plotting...") if(file.output){ if(file=="jpg") jpeg(paste("SNP_Density.",paste(taxa,collapse="."),".jpg",sep=""), width = 9*dpi,height=7*dpi,res=dpi,quality = 100) if(file=="pdf") pdf(paste("GAPIT.", taxa,".SNP_Density.Plot.pdf" ,sep=""), width = 9,height=7) if(file=="tiff") tiff(paste("SNP_Density.",paste(taxa,collapse="."),".tiff",sep=""), width = 9*dpi,height=7*dpi,res=dpi) par(xpd=TRUE) }else{ if(is.null(dev.list())) dev.new(width = 9,height=7) par(xpd=TRUE) } Densitplot(map=Pmap[,c(1:3)], col=col, bin=bin.size, legend.max=bin.max, main=paste("The number of SNPs within ", bin.size/1e6, "Mb window size", sep="")) if(file.output) dev.off() } if(length(plot.type) !=1 | (!"d" %in% plot.type)){ #order Pmap by the name of SNP #Pmap=Pmap[order(Pmap[,1]),] Pmap <- as.matrix(Pmap) #delete the column of SNPs names Pmap <- Pmap[,-1] Pmap[is.na(Pmap)]=1 #print(dim(Pmap)) #scale and adjust the parameters cir.chr.h <- cir.chr.h/5 cir.band <- cir.band/5 threshold=threshold/nrow(Pmap) if(!is.null(threshold)){ threshold.col <- rep(threshold.col,length(threshold)) threshold.lwd <- rep(threshold.lwd,length(threshold)) threshold.lty <- rep(threshold.lty,length(threshold)) signal.col <- rep(signal.col,length(threshold)) signal.pch <- rep(signal.pch,length(threshold)) signal.cex <- rep(signal.cex,length(threshold)) } if(length(cex)!=3) cex <- rep(cex,3) if(!is.null(ylim)){ if(length(ylim)==1) ylim <- c(0,ylim) } if(is.null(conf.int.col)) conf.int.col <- NA if(is.na(conf.int.col)){ conf.int=FALSE }else{ conf.int=TRUE } #get the number of traits R=ncol(Pmap)-2 #replace the non-euchromosome options(warn = -1) numeric.chr <- as.numeric(Pmap[, 1]) options(warn = 0) max.chr <- max(numeric.chr, na.rm=TRUE) if(is.infinite(max.chr)) max.chr <- 0 map.xy.index <- which(!numeric.chr %in% c(0:max.chr)) if(length(map.xy.index) != 0){ chr.xy <- unique(Pmap[map.xy.index, 1]) for(i in 1:length(chr.xy)){ Pmap[Pmap[, 1] == chr.xy[i], 1] <- max.chr + i } } Pmap <- matrix(as.numeric(Pmap), nrow(Pmap)) #order the GWAS results by chromosome and position Pmap <- Pmap[order(Pmap[, 1], Pmap[,2]), ] #get the index of chromosome chr <- unique(Pmap[,1]) chr.ori <- chr if(length(map.xy.index) != 0){ for(i in 1:length(chr.xy)){ chr.ori[chr.ori == max.chr + i] <- chr.xy[i] } } pvalueT <- as.matrix(Pmap[,-c(1:2)]) #print(dim(pvalueT)) pvalue.pos <- Pmap[, 2] p0.index <- Pmap[, 1] == 0 if(sum(p0.index) != 0){ pvalue.pos[p0.index] <- 1:sum(p0.index) } pvalue.pos.list <- tapply(pvalue.pos, Pmap[, 1], list) #scale the space parameter between chromosomes if(!missing(band)){ band <- floor(band*(sum(sapply(pvalue.pos.list, max))/100)) }else{ band <- floor((sum(sapply(pvalue.pos.list, max))/100)) } if(band==0) band=1 if(LOG10){ pvalueT[pvalueT <= 0] <- 1 pvalueT[pvalueT > 1] <- 1 } #set the colors for the plot #palette(heat.colors(1024)) #(heatmap) #T=floor(1024/max(pvalue)) #plot(pvalue,pch=19,cex=0.6,col=(1024-floor(pvalue*T))) #print(col) if(is.vector(col)){ col <- matrix(col,R,length(col),byrow=TRUE) } if(is.matrix(col)){ #try to transform the colors into matrix for all traits col <- matrix(as.vector(t(col)),R,dim(col)[2],byrow=TRUE) } Num <- as.numeric(table(Pmap[,1])) Nchr <- length(Num) N <- NULL #print(Nchr) #set the colors for each traits for(i in 1:R){ colx <- col[i,] colx <- colx[!is.na(colx)] N[i] <- ceiling(Nchr/length(colx)) } #insert the space into chromosomes and return the midpoint of each chromosome ticks <- NULL pvalue.posN <- NULL #pvalue <- pvalueT[,j] for(i in 0:(Nchr-1)){ if (i==0){ #pvalue <- append(pvalue,rep(Inf,band),after=0) pvalue.posN <- pvalue.pos.list[[i+1]] + band ticks[i+1] <- max(pvalue.posN)-floor(max(pvalue.pos.list[[i+1]])/2) }else{ #pvalue <- append(pvalue,rep(Inf,band),after=sum(Num[1:i])+i*band) pvalue.posN <- c(pvalue.posN, max(pvalue.posN) + band + pvalue.pos.list[[i+1]]) ticks[i+1] <- max(pvalue.posN)-floor(max(pvalue.pos.list[[i+1]])/2) } } pvalue.posN.list <- tapply(pvalue.posN, Pmap[, 1], list) #NewP[[j]] <- pvalue #merge the pvalues of traits by column if(LOG10){ logpvalueT <- -log10(pvalueT) }else{ pvalueT <- abs(pvalueT) logpvalueT <- pvalueT } add <- list() for(i in 1:R){ colx <- col[i,] colx <- colx[!is.na(colx)] add[[i]] <- c(Num,rep(0,N[i]*length(colx)-Nchr)) } TotalN <- max(pvalue.posN) if(length(chr.den.col) > 1){ cir.density=TRUE den.fold <- 20 density.list <- Densitplot(map=Pmap[,c(1,1,2)], col=chr.den.col, plot=FALSE, bin=bin.size, legend.max=bin.max) #list(den.col=col.seg, legend.col=legend.col, legend.y=legend.y) }else{ cir.density=FALSE } #print(dim(pvalueT)) if(is.null(xz)){ signal.line.index <- NULL if(!is.null(threshold)){ if(!is.null(signal.line)){ for(l in 1:R){ if(LOG10){ signal.line.index <- c(signal.line.index,which(pvalueT[,l] < min(threshold))) }else{ signal.line.index <- c(signal.line.index,which(pvalueT[,l] > max(threshold))) } } signal.line.index <- unique(signal.line.index) } } signal.lty=rep(2,length(signal.line.index)) }else{ signal.line.index=as.numeric(as.vector(xz[,1])) signal.lty=as.numeric(as.vector(xz[,2])) }#end is.null(xz) signal.line.index <- pvalue.posN[signal.line.index] } if("c" %in% plot.type) { if(file.output){ if(file=="jpg") jpeg(paste("Circular-Manhattan.",paste(taxa,collapse="."),".jpg",sep=""), width = 8*dpi,height=8*dpi,res=dpi,quality = 100) if(file=="pdf") pdf(paste("GAPIT.", taxa,".Circular.Manhattan.Plot.pdf" ,sep=""), width = 10,height=10) if(file=="tiff") tiff(paste("Circular-Manhattan.",paste(taxa,collapse="."),".tiff",sep=""), width = 8*dpi,height=8*dpi,res=dpi) } if(!file.output){ if(!is.null(dev.list())) dev.new(width=8, height=8) par(pty="s", xpd=TRUE, mar=c(1,1,1,1)) } par(pty="s", xpd=TRUE, mar=c(1,1,1,1)) RR <- r+H*R+cir.band*R if(cir.density){ plot(NULL,xlim=c(1.05*(-RR-4*cir.chr.h),1.1*(RR+4*cir.chr.h)),ylim=c(1.05*(-RR-4*cir.chr.h),1.1*(RR+4*cir.chr.h)),axes=FALSE,xlab="",ylab="") }else{ plot(NULL,xlim=c(1.05*(-RR-4*cir.chr.h),1.05*(RR+4*cir.chr.h)),ylim=c(1.05*(-RR-4*cir.chr.h),1.05*(RR+4*cir.chr.h)),axes=FALSE,xlab="",ylab="") } if(!is.null(signal.line)){ if(!is.null(signal.line.index)){ X1chr <- (RR)*sin(2*pi*(signal.line.index-round(band/2))/TotalN) Y1chr <- (RR)*cos(2*pi*(signal.line.index-round(band/2))/TotalN) X2chr <- (r)*sin(2*pi*(signal.line.index-round(band/2))/TotalN) Y2chr <- (r)*cos(2*pi*(signal.line.index-round(band/2))/TotalN) #print(signal.line) #print(dim(pvalueT)) #print(head(pvalueT)) #print(dim(xz)) #print(xz) #print(head(pvalue.posN)) segments(X1chr,Y1chr,X2chr,Y2chr,lty=signal.lty,lwd=signal.line,col="grey") } } for(i in 1:R){ #get the colors for each trait colx <- col[i,] colx <- colx[!is.na(colx)] #debug #print(colx) #print(paste("Circular_Manhattan Plotting ",taxa[i],"...",sep="")) pvalue <- pvalueT[,i] logpvalue <- logpvalueT[,i] if(is.null(ylim)){ if(LOG10){ Max <- ceiling(-log10(min(pvalue[pvalue!=0]))) }else{ Max <- ceiling(max(pvalue[pvalue!=Inf])) if(Max<=1) Max <- max(pvalue[pvalue!=Inf]) } }else{ Max <- ylim[2] } Cpvalue <- (H*logpvalue/Max) if(outward==TRUE){ if(cir.chr==TRUE){ #plot the boundary which represents the chromosomes polygon.num <- 1000 #print(length(chr)) for(k in 1:length(chr)){ if(k==1){ polygon.index <- seq(round(band/2)+1,-round(band/2)+max(pvalue.posN.list[[1]]), length=polygon.num) #change the axis from right angle into circle format X1chr=(RR)*sin(2*pi*(polygon.index)/TotalN) Y1chr=(RR)*cos(2*pi*(polygon.index)/TotalN) X2chr=(RR+cir.chr.h)*sin(2*pi*(polygon.index)/TotalN) Y2chr=(RR+cir.chr.h)*cos(2*pi*(polygon.index)/TotalN) #print(length(X1chr)) if(is.null(chr.den.col)){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=rep(colx,ceiling(length(chr)/length(colx)))[k],border=rep(colx,ceiling(length(chr)/length(colx)))[k]) }else{ if(cir.density){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col="grey",border="grey") }else{ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=chr.den.col,border=chr.den.col) } } }else{ polygon.index <- seq(1+round(band/2)+max(pvalue.posN.list[[k-1]]),-round(band/2)+max(pvalue.posN.list[[k]]), length=polygon.num) X1chr=(RR)*sin(2*pi*(polygon.index)/TotalN) Y1chr=(RR)*cos(2*pi*(polygon.index)/TotalN) X2chr=(RR+cir.chr.h)*sin(2*pi*(polygon.index)/TotalN) Y2chr=(RR+cir.chr.h)*cos(2*pi*(polygon.index)/TotalN) if(is.null(chr.den.col)){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=rep(colx,ceiling(length(chr)/length(colx)))[k],border=rep(colx,ceiling(length(chr)/length(colx)))[k]) }else{ if(cir.density){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col="grey",border="grey") }else{ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=chr.den.col,border=chr.den.col) } } } } if(cir.density){ segments( (RR)*sin(2*pi*(pvalue.posN-round(band/2))/TotalN), (RR)*cos(2*pi*(pvalue.posN-round(band/2))/TotalN), (RR+cir.chr.h)*sin(2*pi*(pvalue.posN-round(band/2))/TotalN), (RR+cir.chr.h)*cos(2*pi*(pvalue.posN-round(band/2))/TotalN), col=density.list$den.col, lwd=0.1 ) legend( x=RR+4*cir.chr.h, y=(RR+4*cir.chr.h)/2, horiz=F, title="Density", legend=density.list$legend.y, pch=15, pt.cex = 3, col=density.list$legend.col, cex=1, bty="n", y.intersp=1, x.intersp=1, yjust=0.5, xjust=0, xpd=TRUE ) } # XLine=(RR+cir.chr.h)*sin(2*pi*(1:TotalN)/TotalN) # YLine=(RR+cir.chr.h)*cos(2*pi*(1:TotalN)/TotalN) # lines(XLine,YLine,lwd=1.5) if(cir.density){ circle.plot(myr=RR+cir.chr.h,lwd=1.5,add=TRUE,col='grey') circle.plot(myr=RR,lwd=1.5,add=TRUE,col='grey') }else{ circle.plot(myr=RR+cir.chr.h,lwd=1.5,add=TRUE) circle.plot(myr=RR,lwd=1.5,add=TRUE) } } #plot the y axis of legend for each trait if(cir.legend==TRUE){ #try to get the number after radix point if(Max<=1) { round.n=nchar(as.character(10^(-ceiling(-log10(Max)))))-1 }else{ round.n=1 } segments(0,r+H*(i-1)+cir.band*(i-1),0,r+H*i+cir.band*(i-1),col=cir.legend.col,lwd=1.5) segments(0,r+H*(i-1)+cir.band*(i-1),H/20,r+H*(i-1)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-1)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.75)+cir.band*(i-1),H/20,r+H*(i-0.75)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.75)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.5)+cir.band*(i-1),H/20,r+H*(i-0.5)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.5)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.25)+cir.band*(i-1),H/20,r+H*(i-0.25)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.25)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0)+cir.band*(i-1),H/20,r+H*(i-0)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') #text(-r/15,r+H*(i-0.75)+cir.band*(i-1),round(Max*0.25,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) text(-r/15,r+H*(i-0.5)+cir.band*(i-1),round(Max*0.5,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) text(-r/15,r+H*(i-0.25)+cir.band*(i-1),round(Max*0.75,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) #text(-r/15,r+H*(i-0)+cir.band*(i-1),round(Max*1,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) #text(r/5,0.4*(i-1),taxa[i],adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) } X=(Cpvalue+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(pvalue.posN-round(band/2))/TotalN) Y=(Cpvalue+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(pvalue.posN-round(band/2))/TotalN) # plot point in figure points(X[1:(length(X)-legend.bit)],Y[1:(length(Y)-legend.bit)],pch=19,cex=cex[1],col=rep(rep(colx,N[i]),add[[i]])) # plot significant line if(!is.null(threshold)){ if(sum(threshold!=0)==length(threshold)){ for(thr in 1:length(threshold)){ significantline1=ifelse(LOG10, H*(-log10(threshold[thr]))/Max, H*(threshold[thr])/Max) #s1X=(significantline1+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(0:TotalN)/TotalN) #s1Y=(significantline1+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(0:TotalN)/TotalN) # plot significant line if(significantline1=significantline1) HX1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) #cover the points that exceed the threshold with the color "white" points(HX1,HY1,pch=19,cex=cex[1],col="white") for(ll in 1:length(threshold)){ if(ll == 1){ if(LOG10){ significantline1=H*(-log10(threshold[ll]))/Max }else{ significantline1=H*(threshold[ll])/Max } p_amp.index <- which(Cpvalue>=significantline1) HX1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) }else{ if(LOG10){ significantline0=H*(-log10(threshold[ll-1]))/Max significantline1=H*(-log10(threshold[ll]))/Max }else{ significantline0=H*(threshold[ll-1])/Max significantline1=H*(threshold[ll])/Max } p_amp.index <- which(Cpvalue>=significantline1 & Cpvalue < significantline0) HX1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(Cpvalue[p_amp.index]+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) } if(is.null(signal.col)){ # print(signal.pch) points(HX1,HY1,pch=signal.pch,cex=signal.cex[ll]*cex[1],col=rep(rep(colx,N[i]),add[[i]])[p_amp.index]) }else{ # print(signal.pch) points(HX1,HY1,pch=signal.pch,cex=signal.cex[ll]*cex[1],col=signal.col[ll]) } } } } } if(cir.chr==TRUE){ ticks1=1.07*(RR+cir.chr.h)*sin(2*pi*(ticks-round(band/2))/TotalN) ticks2=1.07*(RR+cir.chr.h)*cos(2*pi*(ticks-round(band/2))/TotalN) if(is.null(chr.labels)){ #print(length(ticks)) for(i in 1:(length(ticks)-1)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.ori[i],srt=angle,font=2,cex=cex.axis) } }else{ for(i in 1:length(ticks)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.labels[i],srt=angle,font=2,cex=cex.axis) } } }else{ ticks1=(0.9*r)*sin(2*pi*(ticks-round(band/2))/TotalN) ticks2=(0.9*r)*cos(2*pi*(ticks-round(band/2))/TotalN) if(is.null(chr.labels)){ for(i in 1:length(ticks)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.ori[i],srt=angle,font=2,cex=cex.axis) } }else{ for(i in 1:length(ticks)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.labels[i],srt=angle,font=2,cex=cex.axis) } } } } if(outward==FALSE){ if(cir.chr==TRUE){ # XLine=(2*cir.band+RR+cir.chr.h)*sin(2*pi*(1:TotalN)/TotalN) # YLine=(2*cir.band+RR+cir.chr.h)*cos(2*pi*(1:TotalN)/TotalN) # lines(XLine,YLine,lwd=1.5) polygon.num <- 1000 for(k in 1:length(chr)){ if(k==1){ polygon.index <- seq(round(band/2)+1,-round(band/2)+max(pvalue.posN.list[[1]]), length=polygon.num) X1chr=(2*cir.band+RR)*sin(2*pi*(polygon.index)/TotalN) Y1chr=(2*cir.band+RR)*cos(2*pi*(polygon.index)/TotalN) X2chr=(2*cir.band+RR+cir.chr.h)*sin(2*pi*(polygon.index)/TotalN) Y2chr=(2*cir.band+RR+cir.chr.h)*cos(2*pi*(polygon.index)/TotalN) if(is.null(chr.den.col)){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=rep(colx,ceiling(length(chr)/length(colx)))[k],border=rep(colx,ceiling(length(chr)/length(colx)))[k]) }else{ if(cir.density){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col="grey",border="grey") }else{ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=chr.den.col,border=chr.den.col) } } }else{ polygon.index <- seq(1+round(band/2)+max(pvalue.posN.list[[k-1]]),-round(band/2)+max(pvalue.posN.list[[k]]), length=polygon.num) X1chr=(2*cir.band+RR)*sin(2*pi*(polygon.index)/TotalN) Y1chr=(2*cir.band+RR)*cos(2*pi*(polygon.index)/TotalN) X2chr=(2*cir.band+RR+cir.chr.h)*sin(2*pi*(polygon.index)/TotalN) Y2chr=(2*cir.band+RR+cir.chr.h)*cos(2*pi*(polygon.index)/TotalN) if(is.null(chr.den.col)){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=rep(colx,ceiling(length(chr)/length(colx)))[k],border=rep(colx,ceiling(length(chr)/length(colx)))[k]) }else{ if(cir.density){ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col="grey",border="grey") }else{ polygon(c(rev(X1chr),X2chr),c(rev(Y1chr),Y2chr),col=chr.den.col,border=chr.den.col) } } } } if(cir.density){ segments( (2*cir.band+RR)*sin(2*pi*(pvalue.posN-round(band/2))/TotalN), (2*cir.band+RR)*cos(2*pi*(pvalue.posN-round(band/2))/TotalN), (2*cir.band+RR+cir.chr.h)*sin(2*pi*(pvalue.posN-round(band/2))/TotalN), (2*cir.band+RR+cir.chr.h)*cos(2*pi*(pvalue.posN-round(band/2))/TotalN), col=density.list$den.col, lwd=0.1 ) legend( x=RR+4*cir.chr.h, y=(RR+4*cir.chr.h)/2, title="Density", legend=density.list$legend.y, pch=15, pt.cex = 3, col=density.list$legend.col, cex=1, bty="n", y.intersp=1, x.intersp=1, yjust=0.5, xjust=0, xpd=TRUE ) } if(cir.density){ circle.plot(myr=2*cir.band+RR+cir.chr.h,lwd=1.5,add=TRUE,col='grey') circle.plot(myr=2*cir.band+RR,lwd=1.5,add=TRUE,col='grey') }else{ circle.plot(myr=2*cir.band+RR+cir.chr.h,lwd=1.5,add=TRUE) circle.plot(myr=2*cir.band+RR,lwd=1.5,add=TRUE) } } if(cir.legend==TRUE){ #try to get the number after radix point if(Max<=1) { round.n=nchar(as.character(10^(-ceiling(-log10(Max)))))-1 }else{ round.n=2 } segments(0,r+H*(i-1)+cir.band*(i-1),0,r+H*i+cir.band*(i-1),col=cir.legend.col,lwd=1.5) segments(0,r+H*(i-1)+cir.band*(i-1),H/20,r+H*(i-1)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-1)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.75)+cir.band*(i-1),H/20,r+H*(i-0.75)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.75)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.5)+cir.band*(i-1),H/20,r+H*(i-0.5)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.5)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0.25)+cir.band*(i-1),H/20,r+H*(i-0.25)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0.25)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') segments(0,r+H*(i-0)+cir.band*(i-1),H/20,r+H*(i-0)+cir.band*(i-1),col=cir.legend.col,lwd=1.5) circle.plot(myr=r+H*(i-0)+cir.band*(i-1),lwd=0.5,add=TRUE,col='grey') text(-r/15,r+H*(i-0.25)+cir.band*(i-1),round(Max*0.25,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) #text(-r/15,r+H*(i-0.5)+cir.band*(i-1),round(Max*0.5,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) text(-r/15,r+H*(i-0.75)+cir.band*(i-1),round(Max*0.75,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) #text(-r/15,r+H*(i-1)+cir.band*(i-1),round(Max*1,round.n),adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) #text(r,0.4*(i-1),taxa[i],adj=1,col=cir.legend.col,cex=cir.legend.cex,font=2) } X=(-Cpvalue+r+H*i+cir.band*(i-1))*sin(2*pi*(pvalue.posN-round(band/2))/TotalN) Y=(-Cpvalue+r+H*i+cir.band*(i-1))*cos(2*pi*(pvalue.posN-round(band/2))/TotalN) #points(X,Y,pch=19,cex=cex[1],col=rep(rep(colx,N[i]),add[[i]])) points(X[1:(length(X)-legend.bit)],Y[1:(length(Y)-legend.bit)],pch=19,cex=cex[1],col=rep(rep(colx,N[i]),add[[i]])) if(!is.null(threshold)){ if(sum(threshold!=0)==length(threshold)){ for(thr in 1:length(threshold)){ significantline1=ifelse(LOG10, H*(-log10(threshold[thr]))/Max, H*(threshold[thr])/Max) #s1X=(significantline1+r+H*(i-1)+cir.band*(i-1))*sin(2*pi*(0:TotalN)/TotalN) #s1Y=(significantline1+r+H*(i-1)+cir.band*(i-1))*cos(2*pi*(0:TotalN)/TotalN) if(significantline1=significantline1) HX1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) #cover the points that exceed the threshold with the color "white" points(HX1,HY1,pch=19,cex=cex[1],col="white") for(ll in 1:length(threshold)){ if(ll == 1){ if(LOG10){ significantline1=H*(-log10(threshold[ll]))/Max }else{ significantline1=H*(threshold[ll])/Max } p_amp.index <- which(Cpvalue>=significantline1) HX1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) }else{ if(LOG10){ significantline0=H*(-log10(threshold[ll-1]))/Max significantline1=H*(-log10(threshold[ll]))/Max }else{ significantline0=H*(threshold[ll-1])/Max significantline1=H*(threshold[ll])/Max } p_amp.index <- which(Cpvalue>=significantline1 & Cpvalue < significantline0) HX1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*sin(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) HY1=(-Cpvalue[p_amp.index]+r+H*i+cir.band*(i-1))*cos(2*pi*(pvalue.posN[p_amp.index]-round(band/2))/TotalN) } if(is.null(signal.col)){ points(HX1,HY1,pch=signal.pch,cex=signal.cex[ll]*cex[1],col=rep(rep(colx,N[i]),add[[i]])[p_amp.index]) }else{ points(HX1,HY1,pch=signal.pch,cex=signal.cex[ll]*cex[1],col=signal.col[ll]) } } } } } if(cir.chr==TRUE){ ticks1=1.1*(2*cir.band+RR)*sin(2*pi*(ticks-round(band/2))/TotalN) ticks2=1.1*(2*cir.band+RR)*cos(2*pi*(ticks-round(band/2))/TotalN) if(is.null(chr.labels)){ for(i in 1:(length(ticks)-1)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.ori[i],srt=angle,font=2,cex=cex.axis) } }else{ for(i in 1:length(ticks)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.labels[i],srt=angle,font=2,cex=cex.axis) } } }else{ ticks1=1.0*(RR+cir.band)*sin(2*pi*(ticks-round(band/2))/TotalN) ticks2=1.0*(RR+cir.band)*cos(2*pi*(ticks-round(band/2))/TotalN) if(is.null(chr.labels)){ for(i in 1:length(ticks)){ #adjust the angle of labels of circle plot angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.ori[i],srt=angle,font=2,cex=cex.axis) } }else{ for(i in 1:length(ticks)){ angle=360*(1-(ticks-round(band/2))[i]/TotalN) text(ticks1[i],ticks2[i],chr.labels[i],srt=angle,font=2,cex=cex.axis) } } } } } taxa=append("Centre",taxa,) taxa_col=rep("black",R) taxa_col=append("red",taxa_col) for(j in 1:(R+1)){ text(r/5,0.4*(j-1),taxa[j],adj=1,col=taxa_col[j],cex=cir.legend.cex,font=2) } taxa=taxa[-1] if(file.output) dev.off() } if("q" %in% plot.type){ #print("Starting QQ-plot!",quote=F) amplify=FALSE if(multracks){ if(file.output){ if(file=="jpg") jpeg(paste("Multracks.QQ_plot.",paste(taxa,collapse="."),".jpg",sep=""), width = R*2.5*dpi,height=5.5*dpi,res=dpi,quality = 100) if(file=="pdf") pdf(paste("Multracks.QQ_plot.",paste(taxa,collapse="."),".pdf",sep=""), width = R*2.5,height=5.5) if(file=="tiff") tiff(paste("Multracks.QQ_plot.",paste(taxa,collapse="."),".tiff",sep=""), width = R*2.5*dpi,height=5.5*dpi,res=dpi) par(mfcol=c(1,R),mar = c(0,1,4,1.5),oma=c(3,5,0,0),xpd=TRUE) }else{ if(is.null(dev.list())) dev.new(width = 2.5*R, height = 5.5) par(xpd=TRUE) } for(i in 1:R){ print(paste("Multracks_QQ Plotting ",taxa[i],"...",sep="")) P.values=as.numeric(Pmap[,i+2]) P.values=P.values[!is.na(P.values)] if(LOG10){ P.values=P.values[P.values>0] P.values=P.values[P.values<=1] N=length(P.values) P.values=P.values[order(P.values)] }else{ N=length(P.values) P.values=P.values[order(P.values,decreasing=TRUE)] } p_value_quantiles=(1:length(P.values))/(length(P.values)+1) log.Quantiles <- -log10(p_value_quantiles) if(LOG10){ log.P.values <- -log10(P.values) }else{ log.P.values <- P.values } #calculate the confidence interval of QQ-plot if(conf.int){ N1=length(log.Quantiles) c95 <- rep(NA,N1) c05 <- rep(NA,N1) for(j in 1:N1){ xi=ceiling((10^-log.Quantiles[j])*N) if(xi==0)xi=1 c95[j] <- qbeta(0.95,xi,N-xi+1) c05[j] <- qbeta(0.05,xi,N-xi+1) } index=length(c95):1 }else{ c05 <- 1 c95 <- 1 } YlimMax <- max(floor(max(max(-log10(c05)), max(-log10(c95)))+1), floor(max(log.P.values)+1)) plot(NULL, xlim = c(0,floor(max(log.Quantiles)+1)), axes=FALSE, cex.axis=cex.axis, cex.lab=1.2,ylim=c(0,YlimMax),xlab ="", ylab="", main = taxa[i]) axis(1, at=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), labels=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), cex.axis=cex.axis) axis(2, at=seq(0,YlimMax,ceiling(YlimMax/10)), labels=seq(0,YlimMax,ceiling(YlimMax/10)), cex.axis=cex.axis) #plot the confidence interval of QQ-plot if(conf.int) polygon(c(log.Quantiles[index],log.Quantiles),c(-log10(c05)[index],-log10(c95)),col=conf.int.col,border=conf.int.col) if(!is.null(threshold.col)){par(xpd=FALSE); abline(a = 0, b = 1, col = threshold.col[1],lwd=2); par(xpd=TRUE)} points(log.Quantiles, log.P.values, col = col[1],pch=1,cex=cex[3]) #print(max(log.Quantiles)) # print(length(log.Quantiles)) # print(length(log.P.values)) if(!is.null(threshold)){ if(sum(threshold!=0)==length(threshold)){ thre.line=-log10(min(threshold)) if(amplify==TRUE){ thre.index=which(log.P.values>=thre.line) if(length(thre.index)!=0){ #cover the points that exceed the threshold with the color "white" points(log.Quantiles[thre.index],log.P.values[thre.index], col = "white",pch=19,cex=cex[3]) if(is.null(signal.col)){ points(log.Quantiles[thre.index],log.P.values[thre.index],col = col[1],pch=signal.pch[1],cex=signal.cex[1]) }else{ points(log.Quantiles[thre.index],log.P.values[thre.index],col = signal.col[1],pch=signal.pch[1],cex=signal.cex[1]) } } } } } } if(box) box() if(file.output) dev.off() if(R > 1){ #qq_col=rainbow(R) qq_col=rep(c( '#FF6A6A', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(R/5)) signal.col <- NULL if(file.output){ if(file=="jpg") jpeg(paste("Multiple.QQ_plot.",paste(taxa,collapse="."),".jpg",sep=""), width = 5.5*dpi,height=5.5*dpi,res=dpi,quality = 100) if(file=="pdf") pdf(paste("Multiple.QQ_plot.",paste(taxa,collapse="."),".pdf",sep=""), width = 5.5,height=5.5) if(file=="tiff") tiff(paste("Multiple.QQ_plot.",paste(taxa,collapse="."),".tiff",sep=""), width = 5.5*dpi,height=5.5*dpi,res=dpi) par(mar = c(5,5,4,2),xpd=TRUE) }else{ dev.new(width = 5.5, height = 5.5) par(xpd=TRUE) } P.values=as.numeric(Pmap[,i+2]) P.values=P.values[!is.na(P.values)] if(LOG10){ P.values=P.values[P.values>0] P.values=P.values[P.values<=1] N=length(P.values) P.values=P.values[order(P.values)] }else{ N=length(P.values) P.values=P.values[order(P.values,decreasing=TRUE)] } p_value_quantiles=(1:length(P.values))/(length(P.values)+1) log.Quantiles <- -log10(p_value_quantiles) # calculate the confidence interval of QQ-plot if(conf.int){ N1=length(log.Quantiles) c95 <- rep(NA,N1) c05 <- rep(NA,N1) for(j in 1:N1){ xi=ceiling((10^-log.Quantiles[j])*N) if(xi==0)xi=1 c95[j] <- qbeta(0.95,xi,N-xi+1) c05[j] <- qbeta(0.05,xi,N-xi+1) } index=length(c95):1 } if(!conf.int){c05 <- 1; c95 <- 1} Pmap.min <- Pmap[,3:(R+2)] YlimMax <- max(floor(max(max(-log10(c05)), max(-log10(c95)))+1), -log10(min(Pmap.min[Pmap.min > 0]))) plot(NULL, xlim = c(0,floor(max(log.Quantiles)+1)), axes=FALSE, cex.axis=cex.axis, cex.lab=1.2,ylim=c(0, floor(YlimMax+1)),xlab =expression(Expected~~-log[10](italic(p))), ylab = expression(Observed~~-log[10](italic(p))), main = "QQ plot") #legend("topleft",taxa,col=t(col)[1:R],pch=1,pt.lwd=2,text.font=6,box.col=NA) legend("topleft",taxa,col=qq_col[1:R],pch=1,pt.lwd=3,text.font=6,box.col=NA) axis(1, at=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), labels=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), cex.axis=cex.axis) axis(2, at=seq(0,floor(YlimMax+1),ceiling((YlimMax+1)/10)), labels=seq(0,floor((YlimMax+1)),ceiling((YlimMax+1)/10)), cex.axis=cex.axis) #print(log.Quantiles[index]) #print(index) #print(length(log.Quantiles)) # plot the confidence interval of QQ-plot if(conf.int) polygon(c(log.Quantiles[index],log.Quantiles),c(-log10(c05)[index],-log10(c95)),col=conf.int.col,border=conf.int.col) for(i in 1:R){ #print(paste("Multraits_QQ Plotting ",taxa[i],"...",sep="")) P.values=as.numeric(Pmap[,i+2]) P.values=P.values[!is.na(P.values)] if(LOG10){ P.values=P.values[P.values>0] P.values=P.values[P.values<=1] N=length(P.values) P.values=P.values[order(P.values)] }else{ N=length(P.values) P.values=P.values[order(P.values,decreasing=TRUE)] } p_value_quantiles=(1:length(P.values))/(length(P.values)+1) log.Quantiles <- -log10(p_value_quantiles) if(LOG10){ log.P.values <- -log10(P.values) }else{ log.P.values <- P.values } if((i == 1) & !is.null(threshold.col)){par(xpd=FALSE); abline(a = 0, b = 1, col = threshold.col[1],lwd=2); par(xpd=TRUE)} #print(length(log.Quantiles)) #print("!!!!!") #points(log.Quantiles, log.P.values, col = t(col)[i],pch=1,lwd=3,cex=cex[3]) points(log.Quantiles, log.P.values, col = qq_col[i],pch=1,lwd=3,cex=cex[3]) #print(max(log.Quantiles)) # if(!is.null(threshold)){ if(sum(threshold!=0)==length(threshold)){ thre.line=-log10(min(threshold)) if(amplify==TRUE){ thre.index=which(log.P.values>=thre.line) if(length(thre.index)!=0){ # cover the points that exceed the threshold with the color "white" points(log.Quantiles[thre.index],log.P.values[thre.index], col = "white",pch=19,lwd=3,cex=cex[3]) if(is.null(signal.col)){ points(log.Quantiles[thre.index],log.P.values[thre.index],col = t(col)[i],pch=signal.pch[1],cex=signal.cex[1]) }else{ points(log.Quantiles[thre.index],log.P.values[thre.index],col = signal.col[1],pch=signal.pch[1],cex=signal.cex[1]) } } } } } } box() if(file.output) dev.off() } }else{ for(i in 1:R){ print(paste("Q_Q Plotting ",taxa[i],"...",sep="")) if(file.output){ if(file=="jpg") jpeg(paste("QQplot.",taxa[i],".jpg",sep=""), width = 5.5*dpi,height=5.5*dpi,res=dpi,quality = 100) if(file=="pdf") pdf(paste("QQplot.",taxa[i],".pdf",sep=""), width = 5.5,height=5.5) if(file=="tiff") tiff(paste("QQplot.",taxa[i],".tiff",sep=""), width = 5.5*dpi,height=5.5*dpi,res=dpi) par(mar = c(5,5,4,2),xpd=TRUE) }else{ if(is.null(dev.list())) dev.new(width = 5.5, height = 5.5) par(xpd=TRUE) } P.values=as.numeric(Pmap[,i+2]) P.values=P.values[!is.na(P.values)] if(LOG10){ P.values=P.values[P.values>0] P.values=P.values[P.values<=1] N=length(P.values) P.values=P.values[order(P.values)] }else{ N=length(P.values) P.values=P.values[order(P.values,decreasing=TRUE)] } p_value_quantiles=(1:length(P.values))/(length(P.values)+1) log.Quantiles <- -log10(p_value_quantiles) if(LOG10){ log.P.values <- -log10(P.values) }else{ log.P.values <- P.values } #calculate the confidence interval of QQ-plot if(conf.int){ N1=length(log.Quantiles) c95 <- rep(NA,N1) c05 <- rep(NA,N1) for(j in 1:N1){ xi=ceiling((10^-log.Quantiles[j])*N) if(xi==0)xi=1 c95[j] <- qbeta(0.95,xi,N-xi+1) c05[j] <- qbeta(0.05,xi,N-xi+1) } index=length(c95):1 }else{ c05 <- 1 c95 <- 1 } #print(max(log.Quantiles)) #print("@@@@@") YlimMax <- max(floor(max(max(-log10(c05)), max(-log10(c95)))+1), floor(max(log.P.values)+1)) plot(NULL, xlim = c(0,floor(max(log.Quantiles)+1)), axes=FALSE, cex.axis=cex.axis, cex.lab=1.2,ylim=c(0,YlimMax),xlab =expression(Expected~~-log[10](italic(p))), ylab = expression(Observed~~-log[10](italic(p))), main = paste("QQplot of",taxa[i])) axis(1, at=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), labels=seq(0,floor(max(log.Quantiles)+1),ceiling((max(log.Quantiles)+1)/10)), cex.axis=cex.axis) axis(2, at=seq(0,YlimMax,ceiling(YlimMax/10)), labels=seq(0,YlimMax,ceiling(YlimMax/10)), cex.axis=cex.axis) #plot the confidence interval of QQ-plot #print(log.Quantiles[index]) qq_col=rainbow(R) #if(conf.int) polygon(c(log.Quantiles[index],log.Quantiles),c(-log10(c05)[index],-log10(c95)),col=conf.int.col,border=conf.int.col) if(conf.int) polygon(c(log.Quantiles[index],log.Quantiles),c(-log10(c05)[index],-log10(c95)),col=qq_col[i],border=conf.int.col) if(!is.null(threshold.col)){par(xpd=FALSE); abline(a = 0, b = 1, col = threshold.col[1],lwd=2); par(xpd=TRUE)} points(log.Quantiles, log.P.values, col = col[1],pch=19,cex=2) if(!is.null(threshold)){ if(sum(threshold!=0)==length(threshold)){ thre.line=-log10(min(threshold)) if(amplify==TRUE){ thre.index=which(log.P.values>=thre.line) if(length(thre.index)!=0){ #print("!!!!") #cover the points that exceed the threshold with the color "white" points(log.Quantiles[thre.index],log.P.values[thre.index], col = "white",pch=19,lwd=3,cex=cex[3]) if(is.null(signal.col)){ points(log.Quantiles[thre.index],log.P.values[thre.index],col = col[1],pch=signal.pch[1],cex=signal.cex[1]) }else{ points(log.Quantiles[thre.index],log.P.values[thre.index],col = signal.col[1],pch=signal.pch[1],cex=signal.cex[1]) } } } } } box() if(file.output) dev.off() } } print("Multiple QQ plot has been finished!",quote=F) } }#End of Whole function #} `GAPIT.Compress` <- function(KI,kinship.cluster = "average",kinship.group = "Mean",GN=nrow(KI),Timmer,Memory){ #Object: To cluster individuals into groups based on kinship #Output: GA, KG #Authors: Alex Lipka and Zhiwu Zhang # Last update: April 14, 2011 ############################################################################################## Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP start") Memory=GAPIT.Memory(Memory=Memory,Infor="cp start") # Extract the line names line.names <- KI[,1] Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Does this change memory0") Memory=GAPIT.Memory(Memory=Memory,Infor="Does this change memory0") # Remove the first column of the kinship matrix, which is the line names KI <- KI[ ,-1] # Convert kinship to distance #distance.matrix <- 2 - KI #distance.matrix.as.dist <- as.dist(distance.matrix) #distance.matrix.as.dist <- as.dist(2 - KI) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP distance") Memory=GAPIT.Memory(Memory=Memory,Infor="cp distance") #print(paste("The value of kinship.cluster is ", kinship.cluster, sep = "")) # hclust() will perform the hiearchical cluster analysis #cluster.distance.matrix <- hclust(distance.matrix.as.dist, method = kinship.cluster) #cluster.distance.matrix <- hclust(as.dist(2 - KI), method = kinship.cluster) distance.matrix=dist(KI,upper=TRUE) #Jiabo Wang modified ,the dist is right function for cluster cluster.distance.matrix=hclust(distance.matrix,method=kinship.cluster) #cutree(out_hclust,k=3) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cluster") Memory=GAPIT.Memory(Memory=Memory,Infor="cp cluster") # Cutree will assign lines into k clusters group.membership <- cutree(cluster.distance.matrix, k = GN) compress_z=table(group.membership,paste(line.names)) #build compress z with group.membership Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cutree") Memory=GAPIT.Memory(Memory=Memory,Infor="cp cutree") #calculate group kinship if(kinship.group == "Mean"){ #This matrix ooperation is much faster than tapply function for "Mean" x=as.factor(group.membership) #b = model.matrix(~x-1) n=max(as.numeric(as.vector(x))) b=diag(n)[x,] KG=t(b)%*%as.matrix(KI)%*%b CT=t(b)%*%(0*as.matrix(KI)+1)%*%b KG=as.matrix(KG/CT) rownames(KG)=c(1:nrow(KG)) colnames(KG)=c(1:ncol(KG)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation original") Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation original") }else{ gm=as.factor(group.membership) kv=as.numeric(as.matrix(KI)) kvr=rep(gm,ncol(KI)) kvc=as.numeric(t(matrix(kvr,nrow(KI),ncol(KI)))) kInCol=t(rbind(kv,kvr,kvc)) rm(gm) rm(kv) rm(kvr) rm(kvc) rm(KI) gc() #This part does not work yet #if(kinship.group == "Mean") # KG<- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), mean) if(kinship.group == "Max") KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), max) if(kinship.group == "Min") KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), min) if(kinship.group == "Median") KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), median) } #this is end of brancing "Mean" and the rest Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation") Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation") # add line names #GA <- data.frame(group.membership) GA <- data.frame(cbind(as.character(line.names),as.numeric(group.membership) )) #Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP Final") #Memory=GAPIT.Memory(Memory=Memory,Infor="CP Final") #write.table(KG, paste("KG_from_", kinship.group, "_Method.txt"), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) #print("GAPIT.Compress accomplished successfully!") return(list(GA=GA, KG=KG,Timmer=Timmer,Memory=Memory)) }#The function GAPIT.Compress ends here #============================================================================================= `GAPIT.Compression.Visualization` <- function(Compression = Compression, name.of.trait = name.of.trait){ #Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure #Output: Three pdfs: One of the log likelihood function, one of the genetic and error variance component, # and one of the heritabilities #Authors: Alex Lipka and Zhiwu Zhang # Last update: May 10, 2011 ############################################################################################## #Graph the optimum compression print("GAPIT.Compression.Visualization") #print(Compression) if(length(Compression)<=6) Compression=t(as.matrix(Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),])) if(length(Compression)==6) Compression=matrix(Compression,1,6) #print("Compression matrix") #print(Compression) #print(length(Compression) ) if(length(Compression)>6) Compression=Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),] if(length(Compression)<1) return() #no result #Pie chart for the optimum setting #------------------------------------------------------------------------------- print("Pie chart") LL=as.numeric(Compression[,4]) Compression.best=Compression[1,] variance=as.numeric(Compression.best[5:6]) #colors <- c("grey50","grey70") colors <- c("#990000","dimgray") varp=variance/sum(variance) h2.opt= varp[1] labels0 <- round(varp * 100, 1) labels <- paste(labels0, "%", sep="") legend0=c("Genetic: ","Residual: ") legend <- paste(legend0, round(variance*100)/100, sep="") LL.best0=as.numeric(Compression.best[4] ) LL.best=paste("-2LL: ",floor(LL.best0*100)/100,sep="") label.comp=paste(c("Cluster method: ","Group method: ","Group number: "), Compression.best[c(1:3)], sep="") theOptimum=c(label.comp,LL.best) #print(variance) pdf(paste("GAPIT.", name.of.trait,".Optimum.pdf", sep = ""), width = 14) par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7)) pie(variance, col=colors, labels=labels,angle=45,border=NA) legend(1.0, 0.5, legend, cex=1.5, bty="n", fill=colors) #Display the optimum compression text(1.5,.0, "The optimum compression", col= "gray10") for(i in 1:4){ text(1.5,-.1*i, theOptimum[i], col= "gray10") } dev.off() #sort Compression by group number for plot order Compression=Compression[order(as.numeric(Compression[,3])),] #Graph compression with multiple groups #print("Graph compression with multiple groups") if(length(Compression)==6) return() #For to exit if only one row #print("It should not go here") if(length(unique(Compression[,3]))>1) { #Create a vector of colors #print("Setting colors") color.vector.basic <- c("red","blue","black", "blueviolet","indianred","cadetblue","orange") color.vector.addition <- setdiff(c(colors()[grep("red",colors())], colors()[grep("blue",colors())]),color.vector.basic ) color.vector.addition.mixed <- sample(color.vector.addition,max(0,((length(unique(Compression[,1])) * length(unique(Compression[,2])))-length(color.vector.basic)))) color.vector <- c(color.vector.basic,color.vector.addition.mixed ) #Create a vector of numbers for the line dot types line.vector <- rep(1:(length(unique(Compression[,1])) * length(unique(Compression[,2])))) #We want to have a total of three plots, one displaying the likelihood function, one displaying the variance components, and one displaying the # heritability pdf(paste("GAPIT.", name.of.trait,".Compression.multiple.group", ".pdf", sep = ""), width = 14) par(mfrow = c(2,3), mar = c(5,5,1,1), lab = c(5,5,7)) # Make the likelihood function plot #print("Likelihood") k <- 1 for(i in 1:length(unique(Compression[,1]))){ for(j in 1:length(unique(Compression[,2]))){ if((i == 1)&(j == 1)) { Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,4]) plot(y~x,type="l", pch = 30, lty = line.vector[i], ylim=c(min(as.numeric(Compression[,4])),max(as.numeric(Compression[,4]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))), col = color.vector[j], xlab = "Number of Groups", ylab = "-2Log Likelihoood",lwd=1 ) label = paste(c(as.character(unique(Compression[,1]))[k]," ",as.character(unique(Compression[,2]))[j]), collapse = "") } if((i != 1)|(j != 1)) { k <- k+1 Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,4]) lines(y~x,type="l", pch = 30, lty = line.vector[i], col = color.vector[j]) label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")) } } } #Make a legend #legend("topright", label, fill = color.vector) legend.col= 1+floor(length(unique(Compression[,1])) * length(unique(Compression[,2]))/20) line.style=rep(1:length(unique(Compression[,1])), each = length(unique(Compression[,2]))) line.color=rep(1:length(unique(Compression[,2])), length(unique(Compression[,1]))) legend("topright", label, col = color.vector[line.color], lty = line.style, ncol=legend.col,horiz=FALSE,bty="n") # Make the genetic variance component plots #print("genetic variance") k <- 1 for(i in 1:length(unique(Compression[,1]))){ for(j in 1:length(unique(Compression[,2]))){ if((i == 1)&(j == 1)) { Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,5]) plot(y~x,type="l", pch = 17, lty = line.vector[i], ylim=c(min(as.numeric(Compression[,5])),max(as.numeric(Compression[,5]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))), col = color.vector[j], xlab = "Number of Groups", ylab = "Genetic Variance", ) #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "") } if((i != 1)|(j != 1)) { k <- k+1 Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,5]) lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j]) #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")) } } } #Make a legend #legend("topleft", label, fill = color.vector) # Make the residual variance component plots k <- 1 for(i in 1:length(unique(Compression[,1]))){ for(j in 1:length(unique(Compression[,2]))){ if((i == 1)&(j == 1)) { Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,6]) plot(y~x,type="l", pch = 17, ylim=c(min(as.numeric(Compression[,6])),max(as.numeric(Compression[,6]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))), col = color.vector[j], xlab = "Number of Groups", ylab = "Residual Variance", ) #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "") } if((i != 1)|(j != 1)) { k <- k+1 Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,6]) lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j]) #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")) } } } #Make a legend #legend("topright", label, fill = color.vector) #calculate total variance and h2 #print("h2") heritablilty.vector <- as.numeric(Compression[,5])/(as.numeric(Compression[,5]) + as.numeric(Compression[,6])) totalVariance.vector <- as.numeric(as.numeric(Compression[,5]) + as.numeric(Compression[,6])) Compression.h2 <- cbind(Compression, heritablilty.vector,totalVariance.vector) # Make the total variance component plots #print("Total variance") k <- 1 for(i in 1:length(unique(Compression.h2[,1]))){ for(j in 1:length(unique(Compression.h2[,2]))){ if((i == 1)&(j == 1)) { Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,8]) plot(y~x,type="l", pch = 17, lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,8])),max(as.numeric(Compression.h2[,8]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))), col = color.vector[1], xlab = "Number of Groups", ylab = "Total Variance", ) #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "") } if((i != 1)|(j != 1)) { k <- k+1 Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,8]) lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j]) #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")) } } } #Make a legend #legend("topright", label, fill = color.vector) # Make the heritability plots #print("h2 plot") k <- 1 for(i in 1:length(unique(Compression[,1]))){ for(j in 1:length(unique(Compression[,2]))){ if((i == 1)&(j == 1)) { Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,7]) plot(y~x,type="l", pch = 17, lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,7])),max(as.numeric(Compression.h2[,7]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))), col = color.vector[1], xlab = "Number of Groups", ylab = "Heritability", ) #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "") } if((i != 1)|(j != 1)) { k <- k+1 Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j])) ), ] x <- as.numeric(Compression.subset[,3]) y <- as.numeric(Compression.subset[,7]) lines(y~x,type="l", lty = line.vector[i], pch = 17, col = color.vector[j]) #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")) } } } #Make a legend #legend("topleft", label, fill = color.vector) legend.col= 1+floor(length(unique(Compression[,1])) * length(unique(Compression[,2]))/20) line.style=rep(1:length(unique(Compression[,1])), each = length(unique(Compression[,2]))) line.color=rep(1:length(unique(Compression[,2])), length(unique(Compression[,1]))) # Make labels plot(0~0,axes=FALSE,type="l",ylab = "",xlab = "",frame.plot=FALSE) legend("topleft", label, col = color.vector[line.color], lty = line.style, ncol=legend.col,horiz=FALSE) dev.off() }#end of Graph compression with multiple groups #Graph compression with single groups #print("Graph compression with single groups") if(length(unique(Compression[,3]))==1& length(unique(Compression[,1]))*length(unique(Compression[,2]))>1) { #Graph the compression with only one group pdf(paste("GAPIT.Compression.single.group.", name.of.trait, ".pdf", sep = ""), width = 14) par(mfrow = c(2,2), mar = c(5,5,1,1), lab = c(5,5,7)) nkt=length(unique(Compression[,1])) nca=length(unique(Compression[,2])) kvr=rep(c(1:nkt),nca) kvc0=rep(c(1:nca),nkt) kvc=as.numeric(t(matrix(kvc0,nca,nkt))) kt.name=Compression[1:nkt,1] ca.index=((1:nca)-1)*nkt+1 ca.name=Compression[ca.index,2] KG<- t(tapply(as.numeric(Compression[,4]), list(kvr, kvc), mean)) colnames(KG)=kt.name barplot(as.matrix(KG), ylab= "-2 Log Likelihood",beside=TRUE, col=rainbow(length(unique(Compression[,2])))) KG<- t(tapply(as.numeric(Compression[,5]), list(kvr, kvc), mean)) colnames(KG)=kt.name barplot(as.matrix(KG), ylab= "Genetic varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2])))) KG<- t(tapply(as.numeric(Compression[,6]), list(kvr, kvc), mean)) colnames(KG)=kt.name barplot(as.matrix(KG), ylab= "Residual varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2])))) KG<- t(tapply(as.numeric(Compression[,5])/(as.numeric(Compression[,5])+as.numeric(Compression[,6])), list(kvr, kvc), mean)) colnames(KG)=kt.name barplot(as.matrix(KG), ylab= "Heritability", beside=TRUE, col=rainbow(length(unique(Compression[,2]))),ylim=c(0,1)) legend("topleft", paste(t(ca.name)), cex=0.8,bty="n", fill=rainbow(length(unique(Compression[,2]))),horiz=TRUE) dev.off() } #end of Graph compression with single groups print("GAPIT.Compression.Visualization accomplished successfully!") #return(list(compression=Compression.h2,h2=h2.opt)) return }#GAPIT.Compression.Plots ends here #============================================================================================= `GAPIT.Create.Indicator` <- function(xs, SNP.impute = "Major" ){ #Object: To esimate variance component by using EMMA algorithm and perform GWAS with P3D/EMMAx #Output: ps, REMLs, stats, dfs, vgs, ves, BLUP, BLUP_Plus_Mean, PEV #Authors: Alex Lipka and Zhiwu Zhang # Last update: April 30, 2012 ############################################################################################## #Determine the number of bits of the genotype bit=nchar(as.character(xs[1])) #Identify the SNPs classified as missing if(bit==1) { xss[xss=="xs"]="N" xs[xs=="-"]="N" xs[xs=="+"]="N" xs[xs=="/"]="N" xs[xs=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value } if(bit==2) { xs[xs=="xsxs"]="N" xs[xs=="--"]="N" xs[xs=="++"]="N" xs[xs=="//"]="N" xs[xs=="NN"]="N" } #Create the indicators #Sort the SNPs by genotype frequency xs.temp <- xs[-which(xs == "N")] frequ<- NULL for(i in 1:length(unique(xs.temp))) frequ <- c(frequ, length(which(xs == unique(xs)[i]))) unique.sorted <- cbind(unique(xs.temp), frequ) print("unique.sorted is") print(unique.sorted) unique.sorted <- unique.sorted[order(unique.sorted[,2]),] unique.sorted <- unique.sorted[,-2] #Impute based on the major and minor allele frequencies if(SNP.impute == "Major") xs[which(is.na(xs))] = unique.sorted[1] if(SNP.impute == "Minor") xs[which(is.na(xs))] = unique.sorted[length(unique.sorted)] if(SNP.impute == "Middle") xs[which(is.na(xs))] = unique.sorted[2] x.ind <- NULL for(i in unique.sorted){ x.col <- rep(NA, length(xs)) x.col[which(xs==i)] <- 1 x.col[which(xs!=i)] <- 0 x.ind <- cbind(x.ind,x.col) } return(x.ind) print("GAPIT.Create.Indicator accomplished successfully!") }#end of GAPIT.Create.Indicator function #============================================================================================= `GAPIT.DP` <- function(G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,CV.Inheritance=NULL,GP=NULL,GK=NULL, group.from=30 ,group.to=1000000,group.by=10,DPP=100000, kinship.cluster="average", kinship.group='Mean',kinship.algorithm="VanRaden", bin.from=10000,bin.to=10000,bin.by=10000,inclosure.from=10,inclosure.to=10,inclosure.by=10, SNP.P3D=TRUE,SNP.effect="Add",SNP.impute="Middle",PCA.total=0, SNP.fraction = 1, seed = 123, BINS = 20,SNP.test=TRUE, SNP.MAF=0,FDR.Rate = 1, SNP.FDR=1,SNP.permutation=FALSE,SNP.CV=NULL,SNP.robust="GLM", NJtree.group=NULL,NJtree.type=c("fan","unrooted"),plot.bin=10^6,PCA.col=NULL,PCA.3d=FALSE, file.from=1, file.to=1, file.total=NULL, file.fragment = 99999,file.path=NULL,Inter.Plot=FALSE,Inter.type=c("m","q"), file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, ngrid = 100, llim = -10, ulim = 10, esp = 1e-10, Multi_iter=FALSE,num_regwas=10, LD.chromosome=NULL,LD.location=NULL,LD.range=NULL, p.threshold=NA,QTN.threshold=0.01,maf.threshold=0.03, sangwich.top=NULL,sangwich.bottom=NULL,QC=TRUE,GTindex=NULL,LD=0.1, file.output=TRUE,cutOff=0.01, Model.selection = FALSE,output.numerical = FALSE,Random.model=FALSE, output.hapmap = FALSE, Create.indicator = FALSE,QTN=NULL, QTN.round=1,QTN.limit=0, QTN.update=TRUE, QTN.method="Penalty", Major.allele.zero = FALSE, method.GLM="fast.lm",method.sub="reward",method.sub.final="reward",method.bin="static",bin.size=c(1000000),bin.selection=c(10,20,50,100,200,500,1000), memo="",Prior=NULL,ncpus=1,maxLoop=3,threshold.output=.01, WS=c(1e0,1e3,1e4,1e5,1e6,1e7),alpha=c(.01,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,1),maxOut=100,QTN.position=NULL, converge=1,iteration.output=FALSE,acceleration=0,iteration.method="accum",PCA.View.output=TRUE,Geno.View.output=TRUE,plot.style="Oceanic",SUPER_GD=NULL,SUPER_GS=FALSE,CG=NULL,model="MLM"){ #Object: To Data and Parameters #Designed by Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novenber 3, 2016 ############################################################################################## print("GAPIT.DP in process...") #Judge phenotype genotype and GAPIT logical #print(file.from) #print(kinship.algorithm) #print(NJtree.group) myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,PCA.total=PCA.total,kinship.algorithm=kinship.algorithm,SNP.fraction=SNP.fraction,SNP.test=FALSE, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,NJtree.group=NJtree.group,NJtree.type=NJtree.type, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range, GP=GP,GK=GK,bin.size=NULL,inclosure.size=NULL, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,GTindex=NULL,file.output=file.output, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero,Geno.View.output=Geno.View.output,PCA.col=PCA.col,PCA.3d=PCA.3d) # } KI=myGenotype$KI PC=myGenotype$PC print(dim(PC)) genoFormat=myGenotype$genoFormat hasGenotype=myGenotype$hasGenotype byFile=myGenotype$byFile fullGD=myGenotype$fullGD GD=myGenotype$GD GI=myGenotype$GI GT=myGenotype$GT G=myGenotype$G chor_taxa=myGenotype$chor_taxa #if G exist turn to GD and GM if(output.numerical) write.table(GD, "GAPIT.Genotype.Numerical.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = NA) if(output.hapmap) write.table(myGenotype$G, "GAPIT.Genotype.hmp.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) rownames(GD)=GT colnames(GD)=GI[,1] GD=cbind(as.data.frame(GT),GD) print("GAPIT.DP accomplished successfully for multiple traits. Results are saved") return (list(Y=NULL,G=G,GD=GD,GM=GI,KI=KI,Z=Z,CV=CV,CV.Inheritance= CV.Inheritance,GP=GP,GK=GK,PC=PC,GI=GI, group.from= group.from ,group.to= group.to,group.by= group.by,DPP= DPP, name.of.trait=NULL, kinship.cluster= kinship.cluster, kinship.group= kinship.group,kinship.algorithm= kinship.algorithm,NJtree.group=NJtree.group,NJtree.type=NJtree.type,PCA.col=PCA.col,PCA.3d=PCA.3d, bin.from= bin.from,bin.to= bin.to,bin.by= bin.by,inclosure.from= inclosure.from,inclosure.to= inclosure.to,inclosure.by= inclosure.by, SNP.P3D= SNP.P3D,SNP.effect= SNP.effect,SNP.impute= SNP.impute,PCA.total= PCA.total, SNP.fraction = SNP.fraction, seed = seed, BINS = BINS,SNP.test=SNP.test, SNP.MAF= SNP.MAF,FDR.Rate = FDR.Rate, SNP.FDR= SNP.FDR,SNP.permutation= SNP.permutation, SNP.CV= SNP.CV,SNP.robust= SNP.robust, file.from= file.from, file.to=file.to, file.total= file.total, file.fragment = file.fragment,file.path= file.path, file.G= file.G, file.Ext.G= file.Ext.G,file.GD= file.GD, file.GM= file.GM, file.Ext.GD= file.Ext.GD,file.Ext.GM= file.Ext.GM, ngrid = ngrid, llim = llim, ulim = ulim, esp = esp,Inter.Plot=Inter.Plot,Inter.type=Inter.type, LD.chromosome= LD.chromosome,LD.location= LD.location,LD.range= LD.range,Multi_iter=Multi_iter, sangwich.top= sangwich.top,sangwich.bottom= sangwich.bottom,QC= QC,GTindex= GTindex,LD= LD,GT=GT, file.output= file.output,cutOff=cutOff, Model.selection = Model.selection,output.numerical = output.numerical, output.hapmap = output.hapmap, Create.indicator = Create.indicator,Random.model=Random.model, QTN= QTN, QTN.round= QTN.round,QTN.limit= QTN.limit, QTN.update= QTN.update, QTN.method= QTN.method, Major.allele.zero = Major.allele.zero, method.GLM= method.GLM,method.sub= method.sub,method.sub.final= method.sub.final, method.bin= method.bin,bin.size= bin.size,bin.selection= bin.selection, memo= memo,Prior= Prior,ncpus=1,maxLoop= maxLoop,threshold.output= threshold.output, WS= WS,alpha= alpha,maxOut= maxOut,QTN.position= QTN.position, converge=1,iteration.output= iteration.output,acceleration=0, iteration.method= iteration.method,PCA.View.output= PCA.View.output, p.threshold=p.threshold,QTN.threshold=QTN.threshold, maf.threshold=maf.threshold,chor_taxa=chor_taxa,num_regwas=num_regwas, Geno.View.output= Geno.View.output,plot.style= plot.style,SUPER_GD= SUPER_GD,SUPER_GS= SUPER_GS,CG=CG,plot.bin=plot.bin)) } #end of GAPIT DP function #============================================================================================= `GAPIT.EMMAxP3D` <- function(ys,xs,K=NULL,Z=NULL,X0=NULL,CVI=NULL,CV.Inheritance=NULL,GI=NULL,GP=NULL, file.path=NULL,file.from=NULL,file.to=NULL,file.total=1, genoFormat="Hapmap", file.fragment=NULL,byFile=FALSE,fullGD=TRUE,SNP.fraction=1, file.G=NULL,file.Ext.G=NULL,GTindex=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, SNP.P3D=TRUE,Timmer,Memory,optOnly=TRUE,SNP.effect="Add",SNP.impute="Middle", SNP.permutation=FALSE, ngrids=100,llim=-10,ulim=10,esp=1e-10,name.of.trait=NULL, Create.indicator = FALSE, Major.allele.zero = FALSE){ #Object: To esimate variance component by using EMMA algorithm and perform GWAS with P3D/EMMAx #Output: ps, REMLs, stats, dfs, vgs, ves, BLUP, BLUP_Plus_Mean, PEV #Authors: Feng Tian, Alex Lipka and Zhiwu Zhang # Last update: April 6, 2016 # Library used: EMMA (Kang et al, Genetics, Vol. 178, 1709-1723, March 2008) # Note: This function was modified from the function of emma.REML.t from the library ############################################################################################## #print("EMMAxP3D started...") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="P3D Start") Memory=GAPIT.Memory(Memory=Memory,Infor="P3D Start") #When numeric genotypes are selected, impute the missing SNPs with the allele indicated by the "SNP.impute" value if(!optOnly){ if(SNP.impute == "Major") xs[which(is.na(xs))] = 2 if(SNP.impute == "Minor") xs[which(is.na(xs))] = 0 if(SNP.impute == "Middle") xs[which(is.na(xs))] = 1 } #--------------------------------------------------------------------------------------------------------------------< #Change data to matrix format if they are not if(is.null(dim(ys)) || ncol(ys) == 1) ys <- matrix(ys, 1, length(ys)) if(is.null(X0)) X0 <- matrix(1, ncol(ys), 1) #handler of special Z and K if(!is.null(Z)){ if(ncol(Z) == nrow(Z)) Z = NULL } if(!is.null(K)) {if(length(K)<= 1) K = NULL} #Extract dimension information g <- nrow(ys) #number of traits n <- ncol(ys) #number of observation q0 <- ncol(X0)#number of fixed effects q1 <- q0 + 1 #Nuber of fixed effect including SNP nr=n if(!is.null(K)) tv=ncol(K) #decomposation without fixed effect #print("Caling emma.eigen.L...") if(!is.null(K)) eig.L <- emma.eigen.L(Z, K) #this function handle both NULL Z and non-NULL Z matrix #eig.L$values[eig.L$values<0]=0 Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.L") Memory=GAPIT.Memory(Memory=Memory,Infor="eig.L") #decomposation with fixed effect (SNP not included) #print("Calling emma.eigen.R.w.Z...") X <- X0 #covariate variables such as population structure if(!is.null(Z) & !is.null(K)) eig.R <- try(emma.eigen.R.w.Z(Z, K, X),silent=TRUE) #This will be used to get REstricted ML (REML) if(is.null(Z) & !is.null(K)) eig.R <- try(emma.eigen.R.wo.Z( K, X),silent=TRUE) #This will be used to get REstricted ML (REML) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.R") Memory=GAPIT.Memory(Memory=Memory,Infor="eig.R") #eig.R$values[eig.R$values<0]=0 #print(labels(eig.R)) #print(length(eig.R$values)) #print(dim(eig.R$vectors)) #print("emma.eigen.R.w.Z called!!!") #Handler of error in emma #print("!!!!!!") if(!is.null(K)){ if(inherits(eig.R, "try-error")) return(list(ps = NULL, REMLs = NA, stats = NULL, effect.est = NULL, dfs = NULL,maf=NULL,nobs = NULL,Timmer=Timmer,Memory=Memory, vgs = NA, ves = NA, BLUP = NULL, BLUP_Plus_Mean = NULL, PEV = NULL, BLUE=NULL)) #print("@@@@@") } #--------------------------------------------------------------------------------------------------------------------> #print("Looping through traits...") #Loop on Traits for (j in 1:g) { if(optOnly){ #REMLE <- GAPIT.emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R) #vgs <- REMLE$vg #ves <- REMLE$ve #REMLs <- REMLE$REML #REMLE_delta=REMLE$delta if(!is.null(K)){ REMLE <- GAPIT.emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="REML") Memory=GAPIT.Memory(Memory=Memory,Infor="REML") rm(eig.R) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.R removed") Memory=GAPIT.Memory(Memory=Memory,Infor="eig.R removed") vgs <- REMLE$vg ves <- REMLE$ve REMLs <- REMLE$REML REMLE_delta=REMLE$delta rm(REMLE) gc() } vids <- !is.na(ys[j,]) yv <- ys[j, vids] if(!is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE_delta)),rep(sqrt(1/REMLE_delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE) if( is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix( sqrt(1/(eig.L$values + REMLE_delta)),nr,length(eig.L$values),byrow=TRUE) if( !is.null(Z) & !is.null(K)) eig.full.plus.delta <- as.matrix(c((eig.L$values + REMLE_delta), rep(REMLE_delta,(nr - tv)))) if( is.null(Z) & !is.null(K)) eig.full.plus.delta <- as.matrix((eig.L$values + REMLE_delta)) if(!is.null(K)){ if(length(which(eig.L$values < 0)) > 0 ){ #print("---------------------------------------------------The group kinship matrix at this compression level is not positive semidefinite. Please select another compression level.---------------------------------------------------") #return(list(ps = NULL, REMLs = 999999, stats = NULL, effect.est = NULL, dfs = NULL,maf=NULL,nobs = NULL,Timmer=Timmer,Memory=Memory, #vgs = 1.000, ves = 1.000, BLUP = NULL, BLUP_Plus_Mean = NULL, #PEV = NULL, BLUE=NULL)) } } #Calculate the log likelihood function for the intercept only model X.int <- matrix(1,nrow(as.matrix(yv)),ncol(as.matrix(yv))) iX.intX.int <- solve(crossprod(X.int, X.int)) iX.intY <- crossprod(X.int, as.matrix(as.matrix(yv))) beta.int <- crossprod(iX.intX.int, iX.intY) #Note: we can use crossprod here becase iXX is symmetric X.int.beta.int <- X.int%*%beta.int logL0 <- 0.5*((-length(yv))*log(((2*pi)/length(yv)) *crossprod((yv-X.int.beta.int),(yv-X.int.beta.int))) -length(yv)) #print(paste("The value of logL0 inside of the optonly template is is",logL0, sep = "")) #print(paste("The value of nrow(as.matrix(ys[!is.na(ys)])) is ",nrow(as.matrix(ys[!is.na(ys)])), sep = "")) if(!is.null(K)){ yt <- yt <- crossprod(U, yv) X0t <- crossprod(U, X0) X0X0 <- crossprod(X0t, X0t) X0Y <- crossprod(X0t,yt) XY <- X0Y iX0X0 <- try(solve(X0X0),silent=TRUE) if(inherits(iX0X0, "try-error")){ iX0X0 <- ginv(X0X0) print("At least two of your covariates are linearly dependent. Please reconsider the covariates you are using for GWAS and GPS") } iXX <- iX0X0 } if(is.null(K)){ iXX <- try(solve(crossprod(X,X)),silent=TRUE) if(inherits(iXX, "try-error"))iXX <- ginv(crossprod(X,X)) XY = crossprod(X,yv) } beta <- crossprod(iXX,XY) #Note: we can use crossprod here because iXX is symmetric X.beta <- X%*%beta beta.cv=beta BLUE=X.beta if(!is.null(K)){ U.times.yv.minus.X.beta <- crossprod(U,(yv-X.beta)) logLM <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - sum(log(eig.full.plus.delta)) - length(yv)) } if(is.null(K)){ U.times.yv.minus.X.beta <- yv-X.beta logLM <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - length(yv)) } }#End if(optOnly) #--------------------------------------------------------------------------------------------------------------------< Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Trait") Memory=GAPIT.Memory(Memory=Memory,Infor="Trait") if(!is.null(K)){ REMLE <- GAPIT.emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="REML") Memory=GAPIT.Memory(Memory=Memory,Infor="REML") rm(eig.R) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.R removed") Memory=GAPIT.Memory(Memory=Memory,Infor="eig.R removed") vgs <- REMLE$vg ves <- REMLE$ve REMLs <- REMLE$REML REMLE_delta=REMLE$delta rm(REMLE) gc() } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="REMLE removed") Memory=GAPIT.Memory(Memory=Memory,Infor="REMLE removed") if(!is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE_delta)),rep(sqrt(1/REMLE_delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE) if( is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix( sqrt(1/(eig.L$values + REMLE_delta)),nr,length(eig.L$values),byrow=TRUE) if( !is.null(Z) & !is.null(K)) eig.full.plus.delta <- as.matrix(c((eig.L$values + REMLE_delta), rep(REMLE_delta,(nr - tv)))) if( is.null(Z) & !is.null(K)) eig.full.plus.delta <- as.matrix((eig.L$values + REMLE_delta)) if(!is.null(K)){ if(length(which(eig.L$values < 0)) > 0 ){ #print("---------------------------------------------------The group kinship matrix at this compression level is not positive semidefinite. Please select another compression level.---------------------------------------------------") #return(list(ps = NULL, REMLs = 999999, stats = NULL, effect.est = NULL, dfs = NULL,maf=NULL,nobs = NULL,Timmer=Timmer,Memory=Memory, #vgs = 1.000, ves = 1.000, BLUP = NULL, BLUP_Plus_Mean = NULL, #PEV = NULL, BLUE=NULL)) } } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="U Matrix") Memory=GAPIT.Memory(Memory=Memory,Infor="U Matrix") if(SNP.P3D == TRUE)rm(eig.L) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.L removed") Memory=GAPIT.Memory(Memory=Memory,Infor="eig.L removed") #--------------------------------------------------------------------------------------------------------------------> #The cases that go though multiple file once file.stop=file.to if(optOnly) file.stop=file.from if(fullGD) file.stop=file.from if(!fullGD & !optOnly) {print("Screening SNPs from file...")} #Add loop for genotype data files for (file in file.from:file.stop) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="New Genotype file") Memory=GAPIT.Memory(Memory=Memory,Infor="New Genotype file") frag=1 numSNP=file.fragment myFRG=NULL while(numSNP==file.fragment) { #this is problematic if the read end at the last line #initial previous SNP storage x.prev <- vector(length = 0) #force to skip the while loop if optOnly if(optOnly) numSNP=0 #Determine the case of first file and first fragment: skip read file if(file==file.from & frag==1& SNP.fraction<1){ firstFileFirstFrag=TRUE }else{ firstFileFirstFrag=FALSE } #In case of xs is not full GD, replace xs from file if(!fullGD & !optOnly & !firstFileFirstFrag ) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Clean myFRG") Memory=GAPIT.Memory(Memory=Memory,Infor="Clean myFRG") #update xs for each file rm(xs) rm(myFRG) gc() print(paste("Current file: ",file," , Fragment: ",frag,sep="")) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Read file fragment") Memory=GAPIT.Memory(Memory=Memory,Infor="Read file fragment") myFRG=GAPIT.Fragment( file.path=file.path, file.total=file.total,file.G=file.G,file.Ext.G=file.Ext.G, seed=seed,SNP.fraction=SNP.fraction,SNP.effect=SNP.effect,SNP.impute=SNP.impute,genoFormat=genoFormat, file.GD=file.GD,file.Ext.GD=file.Ext.GD,file.GM=file.GM,file.Ext.GM=file.Ext.GM,file.fragment=file.fragment,file=file,frag=frag, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype file converted") Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype file converted") #print("-----------------------------------------------------------------") if(is.null(myFRG$GD)){ xs=NULL }else{ xs=myFRG$GD[GTindex,] } if(!is.null(myFRG$GI)) { colnames(myFRG$GI)=c("SNP","Chromosome","Position") GI=as.matrix(myFRG$GI) } if(!is.null(myFRG$GI)) { numSNP=ncol(myFRG$GD) } else{ numSNP=0 } if(is.null(myFRG))numSNP=0 #force to end the while loop } # end of if(!fullGD) if(fullGD)numSNP=0 #force to end the while loop #Skip REML if xs is from a empty fragment file if(!is.null(xs)) { if(is.null(dim(xs)) || nrow(xs) == 1) xs <- matrix(xs, length(xs),1) xs <- as.matrix(xs) if(length(which(is.na(xs)))>0){ #for the case where fragments are read in if(SNP.impute == "Major") xs[which(is.na(xs))] = 2 if(SNP.impute == "Minor") xs[which(is.na(xs))] = 0 if(SNP.impute == "Middle") xs[which(is.na(xs))] = 1 } m <- ncol(xs) #number of SNPs t <- nrow(xs) #number of individuals Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Before cleaning") Memory=GAPIT.Memory(Memory=Memory,Infor="Before cleaning") #allocate spaces for SNPs rm(dfs) rm(stats) rm(effect.est) rm(ps) rm(nobs) rm(maf) rm(rsquare_base) rm(rsquare) rm(df) rm(tvalue) rm(stderr) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="After cleaning") Memory=GAPIT.Memory(Memory=Memory,Infor="After cleaning") dfs <- matrix(nrow = m, ncol = g) stats <- matrix(nrow = m, ncol = g) if(!Create.indicator) effect.est <- matrix(nrow = m, ncol = g) if(Create.indicator) effect.est <- NULL ps <- matrix(nrow = m, ncol = g) nobs <- matrix(nrow = m, ncol = g) maf <- matrix(nrow = m, ncol = g) rsquare_base <- matrix(nrow = m, ncol = g) rsquare <- matrix(nrow = m, ncol = g) df <- matrix(nrow = m, ncol = g) tvalue <- matrix(nrow = m, ncol = g) stderr <- matrix(nrow = m, ncol = g) #print(paste("Memory allocated.",sep="")) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Memory allocation") Memory=GAPIT.Memory(Memory=Memory,Infor="Memory allocation") if(optOnly)mloop=0 if(!optOnly)mloop=m #Loop on SNPs #print(paste("Number of SNPs is ",mloop," in genotype file ",file, sep="")) #set starting point of loop if(file==file.from&frag==1){loopStart=0}else{loopStart=1} for (i in loopStart:mloop){ #print(i) #--------------------------------------------------------------------------------------------------------------------< normalCase=TRUE if((i >0)&(floor(i/1000)==i/1000)) {print(paste("Genotype file: ", file,", SNP: ",i," ",sep=""))} # To extract current snp. It save computation for next one in case they are identical if(i ==0&file==file.from&frag==1){ #For the model without fitting SNP vids <- !is.na(ys[j,]) #### Feng changed xv <- ys[j, vids]*0+1 #### Feng changed } if(i >0 | file>file.from | frag>1){ if(Create.indicator){ #I need create indicators and then calculate the minor allele frequency condition.temp <- unique(xs[vids,i]) #Define what a bit is bit=nchar(as.character(xs[vids[1],i])) #Expand on the "which" statement below to include all instances of missing data if(bit==1) condition <- condition.temp[-which(condition.temp == "N")] if(bit==2) condition <- condition.temp[-which(condition.temp == "NN")] #print("condition.temp is ") #print(condition.temp) #print("condition is") #print(condition) #print(paste("The value of i is ", i, sep = "")) if(length(condition) <= 1){ dfs[i, ] <- rep(NA, g) stats[i, ] <- rep(NA, g) effect.est <- rbind(effect.est, c(i,rep(NA, g), rep(NA, g))) ps[i, ] = rep(1, g) rsquare[i, ] <- rep(NA,g) rsquare_base[i, ]<-rep(NA,g) maf[i, ] <- rep(0, g) df[i, ] <- rep(NA,g) tvalue[i, ] <- rep(NA,g) stderr[i, ] <- rep(NA,g) normalCase=FALSE x.prev= vector(length = 0) } } if(normalCase){ #print("The head of xs[vids,i] is") #print(head(xs[vids,i])) if(Create.indicator){ #I need create indicators and then calculate the minor allele frequency indicator <- GAPIT.Create.Indicator(xs[vids,i], SNP.impute = SNP.impute) xv <- indicator$x.ind vids <- !is.na(xv[,1]) #### Feng changed vids.TRUE=which(vids==TRUE) vids.FALSE=which(vids==FALSE) ns=nrow(xv) ss=sum(xv[,ncol(xv)]) maf[i]=min(ss/ns,1-ss/ns) nobs[i]=ns q1 <- q0 + ncol(xv) # This is done so that parameter estimates for all indicator variables are included #These two matrices need to be reinitiated for each SNP. Xt <- matrix(NA,nr, q1) iXX=matrix(NA,q1,q1) } } if(!Create.indicator){ #### Feng changed #print(xs[1:10,1:10]) xv <- xs[vids,i] vids <- !is.na(xs[,i]) #### Feng changed vids.TRUE=which(vids==TRUE) vids.FALSE=which(vids==FALSE) ns=length(xv) #print(xv)) ss=sum(xv) maf[i]=min(.5*ss/ns,1-.5*ss/ns) nobs[i]=ns } nr <- sum(vids) if(i ==1 & file==file.from&frag==1 & !Create.indicator) { Xt <- matrix(NA,nr, q1) iXX=matrix(NA,q1,q1) } } #Situation of no variation for SNP except the fisrt one(synthetic for EMMAx/P3D) if((min(xv) == max(xv)) & (i >0 | file>file.from |frag>1)) { dfs[i, ] <- rep(NA, g) stats[i, ] <- rep(NA, g) if(!Create.indicator) effect.est[i,] <- rep(NA, g) if(Create.indicator) effect.est <- rbind(effect.est, c(i,rep(NA, g),rep(NA, g))) ps[i, ] = rep(1, g) rsquare[i, ] <- rep(NA,g) rsquare_base[i, ]<-rep(NA,g) df[i, ] <- rep(NA,g) tvalue[i, ] <- rep(NA,g) stderr[i, ] <- rep(NA,g) normalCase=FALSE }else if(identical(x.prev, xv)) #Situation of the SNP is identical to previous { if(i >1 | file>file.from | frag>1){ dfs[i, ] <- dfs[i - 1, ] stats[i, ] <- stats[i - 1, ] if(!Create.indicator) effect.est[i, ] <- effect.est[i - 1, ] if(Create.indicator) effect.est <- rbind(effect.est, c(i, rep(NA, g), rep(NA, g))) #If the previous SNP is idnetical, indicate this by "NA" ps[i, ] <- ps[i - 1, ] rsquare[i, ] <- rsquare[i - 1, ] rsquare_base[i, ] <-rsquare_base[i - 1, ] df[i, ] <- df[i - 1, ] tvalue[i, ] <- tvalue[i - 1, ] stderr[i, ] <- stderr[i - 1, ] normalCase=FALSE } } #--------------------------------------------------------------------------------------------------------------------> if(i == 0 &file==file.from &frag==1){ #Calculate the log likelihood function for the intercept only model #vids <- !is.na(ys[j,]) yv <- ys[j, vids] X.int <- matrix(1,nrow(as.matrix(yv)),ncol(as.matrix(yv))) iX.intX.int <- solve(crossprod(X.int, X.int)) iX.intY <- crossprod(X.int, as.matrix(as.matrix(yv))) beta.int <- crossprod(iX.intX.int, iX.intY) #Note: we can use crossprod here becase iXX is symmetric X.int.beta.int <- X.int%*%beta.int #X.int <- matrix(1,nrow(as.matrix(ys[!is.na(ys)])),ncol(as.matrix(ys[!is.na(ys)]))) #iX.intX.int <- solve(crossprod(X.int, X.int)) #iX.intY <- crossprod(X.int, as.matrix(ys[!is.na(ys)])) #beta.int <- crossprod(iX.intX.int, iX.intY) #Note: we can use crossprod here becase iXX is symmetric #X.int.beta.int <- X.int%*%beta.int logL0 <- 0.5*((-length(yv))*log(((2*pi)/length(yv)) *crossprod((yv-X.int.beta.int),(yv-X.int.beta.int))) -length(yv)) #logL0 <- 0.5*((-nrow(as.matrix(ys[!is.na(ys)])))*log(((2*pi)/nrow(ys)) # *crossprod(((as.matrix(ys[!is.na(ys)]))-X.int.beta.int),((as.matrix(ys[!is.na(ys)]))-X.int.beta.int))) # -nrow(as.matrix(ys[!is.na(ys)]))) #print(paste("The value of logL0 inside of the calculating SNPs loop is", logL0, sep = "")) } #Normal case if(normalCase) { #--------------------------------------------------------------------------------------------------------------------< #nv <- sum(vids) yv <- ys[j, vids] #### Feng changed nr <- sum(vids) #### Feng changed if(!is.null(Z) & !is.null(K)) { r<- ncol(Z) ####Feng, add a variable to indicate the number of random effect vran <- vids[1:r] ###Feng, add a variable to indicate random effects with nonmissing genotype tv <- sum(vran) #### Feng changed } #--------------------------------------------------------------------------------------------------------------------> #--------------------------------------------------------------------------------------------------------------------< if(i >0 | file>file.from|frag>1) dfs[i, j] <- nr - q1 if(i >0 | file>file.from|frag>1){ if(!Create.indicator) X <- cbind(X0[vids, , drop = FALSE], xs[vids,i]) if(Create.indicator){ X <- cbind(X0[vids, , drop = FALSE], xv) #if(i == 1) {print("the head of X for running GWAS is")} #if(i == 1) {print(head(X))} } } #Recalculate eig and REML if not using P3D NOTE THIS USED TO BE BEFORE the two solid lines if(SNP.P3D==FALSE & !is.null(K)) { if(!is.null(Z)) eig.R <- emma.eigen.R.w.Z(Z, K, X) #This will be used to get REstricted ML (REML) if(is.null(Z)) eig.R <- emma.eigen.R.wo.Z( K, X) #This will be used to get REstricted ML (REML) if(!is.null(Z)) REMLE <- GAPIT.emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R) if(is.null(Z)) REMLE <- GAPIT.emma.REMLE(ys[j,], X, K, Z = NULL, ngrids, llim, ulim, esp, eig.R) if(!is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE$delta)),rep(sqrt(1/REMLE$delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE) if(is.null(Z) & !is.null(K)) U <- eig.L$vectors * matrix( sqrt(1/(eig.L$values + REMLE$delta)),nr,length(eig.L$values),byrow=TRUE) vgs <- REMLE$vg ves <- REMLE$ve REMLs <- REMLE$REML REMLE_delta=REMLE$delta } if(n==nr) { if(!is.null(K)) { yt <- crossprod(U, yv) if(i == 0 &file==file.from &frag==1){ X0t <- crossprod(U, X0) Xt <- X0t } if(i > 0 | file>file.from |frag>1){ #if(i ==1 & file==file.from&frag==1) Xt <- matrix(NA,nr, q1) if(Create.indicator){ xst <- crossprod(U, X[,(q0+1):q1]) Xt[1:nr,1:q0] <- X0t Xt[1:nr,(q0+1):q1] <- xst } #print(paste("i:",i,"q0:",q0,"q1:",q1,"nt:",nr,"XT row",nrow(Xt),"XT col",ncol(Xt),sep=" ")) if(!Create.indicator){ xst <- crossprod(U, X[,ncol(X)]) Xt[1:nr,1:q0] <- X0t Xt[1:nr,q1] <- xst } } }else{ yt=yv if(i == 0 &file==file.from &frag==1) X0t <- X0 if(i > 0 | file>file.from |frag>1) xst <- X[,ncol(X)] } if(i == 0 &file==file.from &frag==1){ X0X0 <- crossprod(X0t, X0t) #XX <- X0X0 } if(i > 0 | file>file.from |frag>1){ #if(i == 1)XX=matrix(NA,q1,q1) X0Xst <- crossprod(X0t,xst) XstX0 <- t(X0Xst) xstxst <- crossprod(xst, xst) # if(i == 1){ # Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Calculate_X0Xst_XstX0_xstxst") # Memory=GAPIT.Memory(Memory=Memory,Infor="Calculate_X0Xst_XstX0_xstxst") # } #XX <- rbind(cbind(X0X0, X0Xst), cbind(XstX0, xstxst)) #XX[1:q0,1:q0] <- X0X0 #XX[q1,1:q0] <- X0Xst #XX[1:q0,q1] <- X0Xst #XX[q1,q1] <- xstxst } if(X0X0[1,1] == "NaN") { Xt[which(Xt=="NaN")]=0 yt[which(yt=="NaN")]=0 XX=crossprod(Xt, Xt) } if(i == 0 &file==file.from & frag==1){ X0Y <- crossprod(X0t,yt) XY <- X0Y } if(i > 0 | file>file.from |frag>1){ xsY <- crossprod(xst,yt) XY <- c(X0Y,xsY) # if(i == 1){ # Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Calculate_xsY_X0Y") # Memory=GAPIT.Memory(Memory=Memory,Infor="Calculate_xsY_X0Y") # } } #XY = crossprod(Xt,yt) } #Missing SNP if(n>nr) { UU=crossprod(U,U) A11=UU[vids.TRUE,vids.TRUE] A12=UU[vids.TRUE,vids.FALSE] A21=UU[vids.FALSE,vids.TRUE] A22=UU[vids.FALSE,vids.FALSE] A22i =try(solve(A22),silent=TRUE ) if(inherits(A22i, "try-error")) A22i <- ginv(A22) F11=A11-A12%*%A22i%*%A21 XX=crossprod(X,F11)%*%X XY=crossprod(X,F11)%*%yv } if(i == 0 &file==file.from &frag==1){ iX0X0 <- try(solve(X0X0),silent=TRUE) if(inherits(iX0X0, "try-error")){ iX0X0 <- ginv(X0X0) print("At least two of your covariates are linearly dependent. Please reconsider the covariates you are using for GWAS and GPS") } iXX <- iX0X0 } if(i > 0 | file>file.from |frag>1){ #if(i ==1 &file==file.from &frag==1) iXX=matrix(NA,q1,q1) if(Create.indicator){ B22 <- xstxst - XstX0%*%iX0X0%*%X0Xst invB22 <- solve(B22) B21 <- tcrossprod(XstX0, iX0X0) NeginvB22B21 <- crossprod(-invB22,B21) B11 <- iX0X0 + as.numeric(invB22)*crossprod(B21,B21) iXX[1:q0,1:q0]=B11 iXX[(q0+1):q1,(q0+1):q1]=solve(B22) iXX[(q0+1):q1,1:q0]=NeginvB22B21 iXX[1:q0,(q0+1):q1]=t(NeginvB22B21) } if(!Create.indicator){ B22 <- xstxst - XstX0%*%iX0X0%*%X0Xst invB22 <- 1/B22 #B12 <- crossprod(iX0X0,X0Xst) B21 <- tcrossprod(XstX0, iX0X0) NeginvB22B21 <- crossprod(-invB22,B21) #B11 <- iX0X0 + B12%*%invB22%*%B21 B11 <- iX0X0 + as.numeric(invB22)*crossprod(B21,B21) #iXX <- rbind(cbind(B11,t(NeginvB22B21)), cbind(NeginvB22B21,invB22)) iXX[1:q0,1:q0]=B11 iXX[q1,q1]=1/B22 iXX[q1,1:q0]=NeginvB22B21 iXX[1:q0,q1]=NeginvB22B21 } #if(i == 1){ # Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Calculate_iXX") # Memory=GAPIT.Memory(Memory=Memory,Infor="Calculate_iXX") #} } if(is.null(K)){ iXX <- try(solve(crossprod(X,X)),silent=TRUE) if(inherits(iXX, "try-error"))iXX <- ginv(crossprod(X,X)) XY = crossprod(X,yv) } #iXX <- try(solve(XX)) #if(inherits(iXX, "try-error")) iXX <- ginv(crossprod(Xt, Xt)) #print("The dimension if iXX is") #print(dim(iXX)) #print("The length of XY is") #print(length(XY)) beta <- crossprod(iXX,XY) #Note: we can use crossprod here becase iXX is symmetric #print("beta was estimated") #--------------------------------------------------------------------------------------------------------------------> #--------------------------------------------------------------------------------------------------------------------< if(i ==0 &file==file.from &frag==1 & !is.null(K)) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="ReducedModel") Memory=GAPIT.Memory(Memory=Memory,Infor="ReducdModel") #beta.cv=beta XtimesBetaHat <- X%*%beta YminusXtimesBetaHat <- ys[j,]- XtimesBetaHat vgK <- vgs*K Dt <- crossprod(U, YminusXtimesBetaHat) if(!is.null(Z)) Zt <- crossprod(U, Z) if(is.null(Z)) Zt <- t(U) if(X0X0[1,1] == "NaN") { Dt[which(Dt=="NaN")]=0 Zt[which(Zt=="NaN")]=0 } BLUP <- K %*% crossprod(Zt, Dt) #Using K instead of vgK because using H=V/Vg #print("!!!!") #Clean up the BLUP starf to save memory Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="before Dt clean") Memory=GAPIT.Memory(Memory=Memory,Infor="before Dt clean") rm(Dt) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Dt clean") Memory=GAPIT.Memory(Memory=Memory,Infor="Dt clean") grand.mean.vector <- rep(beta[1], length(BLUP)) BLUP_Plus_Mean <- grand.mean.vector + BLUP Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="BLUP") Memory=GAPIT.Memory(Memory=Memory,Infor="BLUP") #PEV C11=try(vgs*solve(crossprod(Xt,Xt)),silent=TRUE) if(inherits(C11, "try-error")) C11=vgs*ginv(crossprod(Xt,Xt)) C21=-K%*%crossprod(Zt,Xt)%*%C11 Kinv=try(solve(K) ,silent=TRUE ) if(inherits(Kinv, "try-error")) Kinv=ginv(K) if(!is.null(Z)) term.0=crossprod(Z,Z)/ves if(is.null(Z)) term.0=diag(1/ves,nrow(K)) term.1=try(solve(term.0+Kinv/vgs ) ,silent=TRUE ) if(inherits(term.1, "try-error")) term.1=ginv(term.0+Kinv/vgs ) term.2=C21%*%crossprod(Xt,Zt)%*%K C22=(term.1-term.2 ) PEV=as.matrix(diag(C22)) #print(paste("The value of is.na(CVI) is", is.na(CVI), sep = "")) if(!is.na(CVI)){ XCV=as.matrix(cbind(1,data.frame(CVI[,-1]))) #CV.Inheritance specified beta.Inheritance=beta if(!is.null(CV.Inheritance)){ XCV=XCV[,1:(1+CV.Inheritance)] beta.Inheritance=beta[1:(1+CV.Inheritance)] } #Interception only if(length(beta)==1)XCV=X BLUE=try(XCV%*%beta.Inheritance,silent=TRUE) if(inherits(BLUE, "try-error")) BLUE = NA #print("GAPIT just after BLUE") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PEV") Memory=GAPIT.Memory(Memory=Memory,Infor="PEV") }#end of if(i ==0&file==file.from & !is.null(K)) if(is.na(CVI)) BLUE = NA }#end if(!is.na(CVI)) #--------------------------------------------------------------------------------------------------------------------> #--------------------------------------------------------------------------------------------------------------------< if(i ==0 &file==file.from &frag==1 & is.null(K)) { YY=crossprod(yt, yt) ves=(YY-crossprod(beta,XY))/(n-q0) r=yt-X%*%iXX%*%XY REMLs=-.5*(n-q0)*log(det(ves)) -.5*n -.5*(n-q0)*log(2*pi) # REMLs=-.5*n*log(det(ves)) -.5*log(det(iXX)/ves) -.5*crossprod(r,r)/ves -.5*(n-q0)*log(2*pi) vgs = 0 BLUP = 0 BLUP_Plus_Mean = NaN PEV = ves #print(paste("X row:",nrow(X)," col:",ncol(X)," beta:",length(beta),sep="")) XCV=as.matrix(cbind(1,data.frame(CVI[,-1]))) #CV.Inheritance specified beta.Inheritance=beta if(!is.null(CV.Inheritance)){ XCV=XCV[,1:(1+CV.Inheritance)] beta.Inheritance=beta[1:(1+CV.Inheritance)] } #Interception only if(length(beta)==1)XCV=X #BLUE=XCV%*%beta.Inheritance modified by jiabo wang 2016.11.21 BLUE=try(XCV%*%beta.Inheritance,silent=TRUE) if(inherits(BLUE, "try-error")) BLUE = NA } #Clean up the BLUP stuff to save memory if(i ==0 &file==file.from &frag==1 & !is.null(K)) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="K normal") Memory=GAPIT.Memory(Memory=Memory,Infor="K normal") if(SNP.P3D == TRUE) K=1 #NOTE: When SNP.P3D == FALSE, this line will mess up the spectral decomposition of the kinship matrix at each SNP. rm(Dt) rm(Zt) rm(Kinv) rm(C11) rm(C21) rm(C22) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="K set to 1") Memory=GAPIT.Memory(Memory=Memory,Infor="K set to 1") } if(i == 0 &file==file.from & frag==1){ beta.cv=beta X.beta <- X%*%beta if(!is.null(K)){ U.times.yv.minus.X.beta <- crossprod(U,(yv-X.beta)) logLM_Base <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - sum(log(eig.full.plus.delta)) - length(yv)) } if(is.null(K)){ U.times.yv.minus.X.beta <- yv-X.beta logLM_Base <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - length(yv)) } rsquare_base_intitialized <- 1-exp(-(2/length(yv))*(logLM_Base-logL0)) } #print(Create.indicator) #calculate t statistics and P-values if(i > 0 | file>file.from |frag>1) { if(!Create.indicator){ #if(i<5)print(beta[q1]) #if(i<5)print(iXX[q1, q1]) if(!is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *vgs) if(is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *ves) effect.est[i, ] <- beta[q1] ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ],lower.tail = FALSE) if(is.na(ps[i,]))ps[i,]=1 #print(c(i,ps[i,],stats[i,],beta[q1],iXX[q1, q1])) } if(Create.indicator){ F.num.first.two <- crossprod(beta[(q0+1):q1], solve(iXX[(q0+1):q1,(q0+1):q1])) if(!is.null(K)) stats[i, j] <- (F.num.first.two %*% beta[(q0+1):q1])/(length((q0+1):q1)*vgs) if(is.null(K)) stats[i, j] <- (F.num.first.two %*% beta[(q0+1):q1])/(length((q0+1):q1)*ves) effect.est <- rbind(effect.est, cbind(rep(i,length((q0+1):q1)), indicator$unique.SNPs, beta[(q0+1):q1])) #Replace with rbind ps[i, ] <- pf(stats[i, j], df1=length((q0+1):q1), df2=(nr-ncol(X)), lower.tail = FALSE) #Alex, are these denominator degrees of freedom correct? dfs[i,] <- nr-nrow(X) } #Calculate the maximum full likelihood function value and the r square X.beta <- X%*%beta if(!is.null(K)){ U.times.yv.minus.X.beta <- crossprod(U,(yv-X.beta)) logLM <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - sum(log(eig.full.plus.delta))- length(yv)) } if(is.null(K)){ U.times.yv.minus.X.beta <- yv-X.beta logLM <- 0.5*(-length(yv)*log(((2*pi)/length(yv))*crossprod(U.times.yv.minus.X.beta,U.times.yv.minus.X.beta)) - length(yv)) } rsquare_base[i, ] <- rsquare_base_intitialized rsquare[i, ] <- 1-exp(-(2/length(yv))*(logLM-logL0)) #Calculate df, t value and standard error _xiaolei changed df[i,] <- dfs[i,] tvalue[i,] <- stats[i, j] stderr[i,] <- beta[ncol(CVI)+1]/stats[i, j] #stderr[i,] <- sqrt(vgs) # modified by Jiabo at 20191115 } #print("!!!!!!!!!!!!!!!") #print(Create.indicator) #--------------------------------------------------------------------------------------------------------------------> } # End of if(normalCase) x.prev=xv #update SNP } # End of loop on SNPs Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Screening SNPs") Memory=GAPIT.Memory(Memory=Memory,Infor="Screening SNPs") # print(head(tvalue)) # print(head(stderr)) # print(head(effect.est)) #output p value for the genotype file if(!fullGD) { #print("!!!!!!!!!!") #print(dim(GI)) write.table(GI, paste("GAPIT.TMP.GI.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = TRUE) write.table(ps, paste("GAPIT.TMP.ps.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(maf, paste("GAPIT.TMP.maf.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(nobs, paste("GAPIT.TMP.nobs.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(rsquare_base, paste("GAPIT.TMP.rsquare.base.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(rsquare, paste("GAPIT.TMP.rsquare.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(df, paste("GAPIT.TMP.df.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(tvalue, paste("GAPIT.TMP.tvalue.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(stderr, paste("GAPIT.TMP.stderr.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) write.table(effect.est, paste("GAPIT.TMP.effect.est.",name.of.trait,file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) #rm(dfs,stats,ps,nobs,maf,GI) #This cause problem on return #gc() } frag=frag+1 #Progress to next fragment } #end of if(!is.null(X)) } #end of repeat on fragment } # Ebd of loop on file } # End of loop on traits Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS done for this Trait") Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS done for this Trait") #print("GAPIT.EMMAxP3D accomplished successfully!") return(list(ps = ps, REMLs = -2*REMLs, stats = stats, effect.est = effect.est, rsquare_base = rsquare_base, rsquare = rsquare, dfs = dfs, df = df, tvalue = tvalue, stderr = stderr,maf=maf,nobs = nobs,Timmer=Timmer,Memory=Memory, vgs = vgs, ves = ves, BLUP = BLUP, BLUP_Plus_Mean = BLUP_Plus_Mean, PEV = PEV, BLUE=BLUE, logLM = logLM,effect.snp=effect.est,effect.cv=beta.cv)) }#end of GAPIT.EMMAxP3D function #============================================================================================= `GAPIT.FDR.TypeI` <- function(WS=c(1e0,1e3,1e4,1e5), GM=NULL,seqQTN=NULL,GWAS=NULL,maxOut=100,MaxBP=1e10){ #Object: To evaluate power and FDR for the top (maxOut) positive interval defined by WS #Input: WS- window size #Input: GM - m by 3 matrix for SNP name, chromosome and BP #Input: seqQTN - s by 1 vecter for index of QTN on GM (+1 for GDP column wise) #Input: GWAS - SNP,CHR,BP,P,MAF #maxOut: maximum number of rows to report #Requirement: None #Output: Table and Plots #Authors: Xiaolei Liu & Zhiwu Zhang # Date start: April 2, 2013 # Last update: Mar 16, 2016 ############################################################################################## #print("GAPIT.Power Started") if(is.null(seqQTN) | is.null(GM)) return(list(Power=NULL,FDR= NULL,TypeI= NULL,False= NULL,AUC.FDR= NULL,AUC.T1= NULL)) #store number fdr and t1 records NQTN=length(seqQTN) table=array(NA,dim=c(NQTN,2*length(WS))) fdrtable=array(NA,dim=c(NQTN,2*length(WS))) t1table=array(NA,dim=c(NQTN,2*length(WS))) cutoff=array(NA,dim=c(length(WS),NQTN)) cut=array(NA,dim=c(1,NQTN)) #-----------------FDR and Power analysis------------------------- #Information needed: GWAS,myGM and QTN(r) GWAS=GWAS[order(GWAS[,2],GWAS[,3]),] GWAS[is.na(GWAS[,4]),4]=1 QTN.list=sort(GWAS[seqQTN,4]) powerlist=seq(1/length(QTN.list),1,length.out=length(QTN.list)) #calculate number of false positives in each WS total.index=1:nrow(GM) theWS=1 for (theWS in 1:length(WS)){ wsws=WS[theWS] qtn.pool=ceiling((as.numeric(GWAS[seqQTN,2])*MaxBP+as.numeric(GWAS[seqQTN,3]))/(2*wsws)) bonf.pool=ceiling((GWAS[total.index,2]*MaxBP+GWAS[total.index,3])/(2*wsws)) false.number=length(levels(factor(bonf.pool[!(bonf.pool%in%qtn.pool)]))) for(j in 1:length(qtn.pool)){ pbin=min(GWAS[bonf.pool==qtn.pool[j],4]) cut[,j]=pbin } if(theWS==1){ totalfalse=false.number }else{ totalfalse=c(totalfalse,false.number) } cutoff[theWS,]=sort(cut) } #Calculate FDR and T1 for(j in 1:ncol(cutoff)){ theWS=1 for (theWS in 1:length(WS)){ p.index=which(GWAS[,4]<=cutoff[theWS,j]) wsws=WS[theWS] qtn.pool=ceiling((GWAS[seqQTN,2]*MaxBP+GWAS[seqQTN,3])/(2*wsws)) bonf.pool=ceiling((GWAS[p.index,2]*MaxBP+GWAS[p.index,3])/(2*wsws)) qtn.number=length(levels(factor(bonf.pool[bonf.pool%in%qtn.pool]))) false.number=length(levels(factor(bonf.pool[!(bonf.pool%in%qtn.pool)]))) if(theWS==1){ final=false.number final.fdr=false.number/(qtn.number+false.number) final.t1=false.number/totalfalse[theWS] }else{ record=false.number record.fdr=false.number/(qtn.number+false.number) record.t1=false.number/totalfalse[theWS] final=c(final,record) final.fdr=c(final.fdr,record.fdr) final.t1=c(final.t1,record.t1) } } #record FDR and T1 if(j==1){ number.record=final fdr.record=final.fdr t1.record=final.t1 }else{ number.record=rbind(number.record,final) fdr.record=rbind(fdr.record,final.fdr) t1.record=rbind(t1.record,final.t1) } } table=number.record fdrtable=fdr.record t1table=t1.record #AUC auc.final.fdr=NULL auc.final.t1=NULL for (theWS in 1:length(WS)){ auc.fdr=GAPIT.AUC(beta=powerlist,alpha=fdrtable[,theWS]) auc.t1=GAPIT.AUC(beta=powerlist,alpha=t1table[,theWS]) auc.final.fdr=c(auc.final.fdr,auc.fdr) auc.final.t1=c(auc.final.t1,auc.t1) } return(list(P=cutoff,Power=powerlist,FDR=fdrtable,TypeI=t1table,False=table,AUC.FDR=auc.final.fdr,AUC.T1=auc.final.t1)) }#end of `GAPIT.FDR.TypeI` #=============================================================================================`GAPIT.FilterByTaxa` <- function(taxa,Data){ #Object: To filter a data (Y, CV or GD) by taxa #Input: taxa - vector of taxa #Input: data - data frame with first column as taxa #Requirement: all taxa must be in data #Output: filtered data #Authors: Zhiwu Zhang # Last update: May 22, 2013 ############################################################################################## #print("GAPIT.FilterByTaxa Started") Data=Data[match(taxa, Data[,1], nomatch = 0),] return (Data) }#The function GAPIT.FilterByTaxa ends here #============================================================================================= `GAPIT.Fragment` <- function(file.path=NULL,file.from=NULL, file.to=NULL,file.total=NULL,file.G=NULL, file.Ext.G=NULL,seed=123,SNP.fraction=1,SNP.effect="Add",SNP.impute="Middle", genoFormat=NULL, file.GD=NULL, file.Ext.GD=NULL, file.GM=NULL, file.Ext.GM=NULL, file.fragment=NULL, file=1,frag=1,LD.chromosome=NULL,LD.location=NULL,LD.range=NULL, Create.indicator = FALSE, Major.allele.zero = FALSE){ #Object: To load SNPs on a (frag)ment in file (this is to replace sampler) #Output: genotype data sampled #Authors: Alex Lipka and Zhiwu Zhang # Last update: August 18, 2011 ############################################################################################## #print("Fragmental reading...") genoFormat="hapmap" if(!is.null(file.GD)&is.null(file.G)) genoFormat="EMMA" if(genoFormat=="hapmap"){ #Initical G #print("Reading file...") G=NULL if(frag==1){ skip.1=0 G <- try(read.delim(paste(file.path,file.G,file, ".",file.Ext.G,sep=""), head = FALSE,skip = skip.1, nrows = file.fragment+1),silent=TRUE) }else{ skip.1 <- (frag-1)*file.fragment +1 G <- try(read.delim(paste(file.path,file.G,file, ".",file.Ext.G,sep=""), head = FALSE,skip = skip.1, nrows = file.fragment),silent=TRUE ) } #print("processing the data...") if(inherits(G, "try-error")) { G=NULL #print("File end reached for G!!!") } if(is.null(G)){ #print("The above error indicating reading after end of file (It is OK).") return(list(GD=NULL,GI=NULL,GT=NULL,linesRead=NULL,GLD=NULL,heading=NULL) ) } #print("Calling hapmap...") heading=(frag==1) #Recording number of lineas read if(heading){ n= nrow(G)-1 }else{ n= nrow(G) } linesRead=n #Sampling if(SNP.fraction<1){ #print("Number of SNP in this pragment:") #print(n) #set.seed(seed+(file*1000)+frag) #mySample=sample(1:n,max(2,floor(n*as.numeric(as.vector(SNP.fraction))))) mySample=sample(1:n,max(2,floor(n*SNP.fraction))) #print("@@@@@@@@@@") #print(mySample) #print(length(mySample)) if(heading){ G=G[c(1,(1+mySample)),] }else{ G=G[mySample,] } } #end of if(SNP.fraction<1) print("Call hapmap from fragment") hm=GAPIT.HapMap(G,SNP.effect=SNP.effect,SNP.impute=SNP.impute,heading=heading, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) #print("Extracting snps for LD plot...") #Extract SNPs for LD plot if(!is.null(LD.chromosome) & !is.null(hm$GD)){ index=(G[,3]==LD.chromosome[1]) & abs((as.numeric(G[,4])-as.numeric(LD.location[1]))<(as.numeric(LD.range[1])/2)) GLD=G[index,] }else{ GLD=NULL } #rm(G) #gc() print("hapmap called successfuly from fragment") return(list(GD=hm$GD,GI=hm$GI,GT=hm$GT,linesRead=linesRead,GLD=GLD,heading=heading,G=G)) print("ERROR: It should not get here!!!") } #end of "hapmap" if(genoFormat=="EMMA"){ #print("The file is a numerical format!") #Initial GD GD=NULL skip.1 <- (frag-1)*file.fragment #Skip the remaining columns GD.temp <- try(read.table(paste(file.path,file.GD, file, ".", file.Ext.GD,sep=""), head = TRUE, nrows = 1),silent=TRUE) num.SNP <- ncol(GD.temp)-1 rm(GD.temp) read.in <- min(file.fragment,(num.SNP-skip.1)) skip.2 <- max((num.SNP - (skip.1 + read.in)),0) print(paste(file.path,file.GD,file, ".",file.Ext.GD,sep="")) GD <- try(read.table(paste(file.path,file.GD,file, ".",file.Ext.GD,sep=""), head = TRUE, colClasses = c("factor", rep("NULL", skip.1), rep("numeric", read.in), rep("NULL", skip.2))) ,silent=TRUE) GI <- try(read.table(paste(file.path,file.GM,file, ".",file.Ext.GM,sep=""), head = TRUE, skip=skip.1, nrows=file.fragment) ,silent=TRUE) if(inherits(GD, "try-error")) { GD=NULL print("File end reached for GD!!!") } if(inherits(GI, "try-error")) { GI=NULL print("File end reached for GI!!!") } if(is.null(GD)) return(list(GD=NULL, GI=NULL,GT=NULL,linesRead=NULL,GLD=NULL)) GT=GD[,1] #Extract infividual names GD=GD[,-1] #Remove individual names #print("Numerical file read sucesfuly from fragment") linesRead=ncol(GD) if(SNP.fraction==1) return(list(GD=GD, GI=GI,GT=GT,linesRead=linesRead,GLD=NULL)) if(SNP.fraction<1){ n= ncol(GD) #set.seed(seed+file) sample=sample(1:n,floor(n*SNP.fraction)) return(list(GD=GD[,sample], GI=GI[sample,],GT=GT,linesRead=linesRead,GLD=NULL)) } } # end of the "EMMA" #print("fragment ended succesfully!") }#End of fragment #============================================================================================= `GAPIT.GS` <- function(KW,KO,KWO,GAU,UW){ #Object: to derive BLUP for the individuals without phenotype #UW:BLUP and PEV of ID with phenotyp #Output: BLUP #Authors: Zhiwu Zhang # Last update: Oct 22, 2015 by Jiabo Wang ############################################################################################## #print(dim(UW)) UO=try(t(KWO)%*%solve(KW)%*%UW,silent=TRUE) #print(dim(KWO)) #kinship without inference #print(dim(KW)) #kinship within inference #print(dim(UW)) #BLUP AND PEV of reference if(inherits(UO, "try-error")) UO=t(KWO)%*%ginv(KW)%*%UW n=ncol(UW) #get number of columns, add additional for individual name BLUP=data.frame(as.matrix(GAU[,1:4])) BLUP.W=BLUP[which(GAU[,3]<2),] W_BLUP=BLUP.W[order(as.numeric(as.matrix(BLUP.W[,4]))),] UW=UW[which(rownames(UW)==colnames(KW)),] # get phenotype groups order ID.W=as.numeric(as.matrix(W_BLUP[,4])) n.W=max(ID.W) DS.W=diag(n.W)[ID.W,] ind.W=DS.W%*%UW all.W=cbind(W_BLUP,ind.W) all=all.W BLUP.O=BLUP[which(GAU[,3]==2),] O_BLUP=BLUP.O[order(as.numeric(as.matrix(BLUP.O[,4]))),] #print(dim(O_BLUP)) if(nrow(O_BLUP)>0){ ID.O=as.numeric(as.matrix(O_BLUP[,4])) n.O=max(ID.O) DS.O=diag(n.O)[ID.O,] ind.O=DS.O%*%UO all.O=cbind(O_BLUP,ind.O) all=rbind(all.W,all.O) } colnames(all)=c("Taxa", "Group", "RefInf","ID","BLUP","PEV") print("GAPIT.GS accomplished successfully!") return(list(BLUP=all)) }#The function GAPIT.GS ends here #============================================================================================= `GAPIT.GS.Visualization` <- function(gsBLUP = gsBLUP, BINS=BINS, name.of.trait = name.of.trait){ #Object: To build heat map to show distribution of BLUP and PEV #Output: pdf #Authors: Zhiwu Zhang # Last update: May 15, 2011 ############################################################################################## nBin=BINS BLUP= gsBLUP[,5] PEV = gsBLUP[,6] if(BLUP[1]=="NaN"){ warning ("It was not converged. BLUP was not created!") } if(BLUP[1]!="NaN" ) { BLUP.max=try(max(BLUP)) BLUP.min=try(min(BLUP)) if(inherits(BLUP.max, "try-error")) return() range.BLUP=BLUP.max-BLUP.min range.PEV=max(PEV)-min(PEV) interval.BLUP=range.BLUP/nBin interval.PEV=range.PEV/nBin bin.BLUP=floor(BLUP/max(BLUP)*nBin)*max(BLUP)/nBin bin.PEV=floor(PEV/max(PEV)*nBin)*max(PEV)/nBin distinct.BLUP=unique(bin.BLUP) distinct.PEV=unique(bin.PEV) if((length(distinct.BLUP)<2) | (length(distinct.PEV)<2) ) return() #nothing to plot Position.BLUP=match(bin.BLUP,distinct.BLUP,nomatch = 0) Position.PEV=match(bin.PEV,distinct.PEV,nomatch = 0) value=matrix(1,length(Position.BLUP)) KG<- (tapply(as.numeric(value), list(Position.BLUP, Position.PEV), sum)) rownames(KG)=round(distinct.BLUP, digits = 4) colnames(KG)=round(distinct.PEV, digits = 4) #Sort the rows and columns in order from smallest to largest rownames(KG) <- rownames(KG)[order(as.numeric(rownames(KG)))] colnames(KG) <- colnames(KG)[order(as.numeric(colnames(KG)))] rownames(KG) <- round(as.numeric(rownames(KG))) colnames(KG) <- round(as.numeric(colnames(KG))) #write.table(KG, "Input_Matrix_for_GS_Heat_Map.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) pdf(paste("GAPIT.", name.of.trait,".GPS.BLUPvsPEV", ".pdf", sep = ""),width = 9) #par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7)) par(mar = c(5,5,6,5)) nba_heatmap <- heatmap.2(KG, Rowv=NA, Colv=NA, col = rev(heat.colors(256)), # scale="column", xlab = "PEV", ylab = "BLUP", main = " ", scale="none", symkey=FALSE, trace="none") #nba_heatmap <- heatmap.2(KG, cexRow =.2, cexCol = 0.2, scale="none", symkey=FALSE, trace="none" ) #cexRow =0.9, cexCol = 0.9) dev.off() } #print("GAPIT.GS.Visualization accomplished successfully!") } #GAPIT.GS.Visualization ends here #============================================================================================= `GAPIT.Genotype` <- function(G=NULL,GD=NULL,GM=NULL,KI=NULL, kinship.algorithm="Zhang",SNP.effect="Add",SNP.impute="Middle",PCA.total=0,PCA.col=NULL,PCA.3d=PCA.3d,seed=123, SNP.fraction =1, file.path=NULL,file.from=NULL, file.to=NULL, file.total=NULL, file.fragment = 1000,SNP.test=TRUE, file.G =NULL,file.Ext.G =NULL, file.GD=NULL,file.Ext.GD=NULL, file.GM=NULL,file.Ext.GM=NULL, SNP.MAF=0.05,FDR.Rate = 0.05,SNP.FDR=1, Timmer=NULL,Memory=NULL, LD.chromosome=NULL,LD.location=NULL,LD.range=NULL, SNP.CV=NULL, GP = NULL,GK = NULL,GTindex=NULL, bin.size = 1000,inclosure.size = 100, sangwich.top=NULL,sangwich.bottom=NULL, file.output=TRUE,kinship.cluster="average",NJtree.group=NULL,NJtree.type=c("fan","unrooted"), Create.indicator = FALSE, Major.allele.zero = FALSE,Geno.View.output=TRUE){ #Object: To unify genotype and calculate kinship and PC if required: # 1.For G data, convert it to GD and GI # 2.For GD and GM data, nothing change # 3.Samling GD and create KI and PC # 4.Go through multiple files # 5.In any case, GD must be returned (for QC) #Output: GD, GI, GT, KI and PC #Authors: Zhiwu Zhang #Last update: August 11, 2011 ############################################################################################## #print("Genotyping: numericalization, sampling kinship, PCs and much more...") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype start") Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype start") #Create logical variables byData=!is.null(G) | !is.null(GD) byFile=!is.null(file.G) | !is.null(file.GD) hasGenotype=(byData | byFile ) needKinPC=(is.null(KI) | PCA.total>0 | kinship.algorithm=="Separation") if(!is.null(KI) & !byData & !byFile & !SNP.test &kinship.algorithm!="SUPER") { print("It return unexpected") return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory)) } #Set indicator for full GD fullGD=FALSE if(byData) fullGD=TRUE if(byFile & SNP.fraction==1 & needKinPC) fullGD=TRUE #SET GT to NULL in case of no genotype if(!byData & !byFile & is.null(GK) &kinship.algorithm!="SUPER") { if(is.null(KI) & is.null(GP) & is.null(GK)) stop("GAPIT says: Kinship has to be provided or estimated from genotype!!!") return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory)) } genoFormat="hapmap" if(is.null(G)&is.null(file.G)) genoFormat="EMMA" #Multiple genotype files #In one of the 3 situations, calculate KI with the algorithm specified, otherwise skip cit by setting algorithm to "SUPER" kinship.algorithm.save=kinship.algorithm kinship.algorithm="SUPER" #Normal if(is.null(sangwich.top) & is.null(sangwich.bottom) ) kinship.algorithm=kinship.algorithm.save #TOP or Bottom is MLM pass.top=FALSE if(!is.null(sangwich.top)) pass.top=!(sangwich.top=="FaST" | sangwich.top=="SUPER" | sangwich.top=="DC") pass.bottom=FALSE if(!is.null(sangwich.bottom)) pass.bottom=!(sangwich.bottom=="FaST" | sangwich.bottom=="SUPER" | sangwich.bottom=="DC") if(pass.top | pass.bottom )kinship.algorithm=kinship.algorithm.save #Compatibility of input #agreement among file from, to and total if(!is.null(file.from) &!is.null(file.to) &!is.null(file.total)) { if(file.total!=(file.to-file.from+1)) stop("GAPIT says: Conflict among file (from, to and total)") } if(!is.null(file.from) &!is.null(file.to)) { if(file.to=1") if(!is.null(G) & !is.null(GD)) stop("GAPIT Ssays: Both hapmap and EMMA format exist, choose one only.") if(!is.null(file.GD) & is.null(file.GM) & (!is.null(GP)|!is.null(GK)) ) stop("GAPIT Ssays: Genotype data and map files should be in pair") if(is.null(file.GD) & !is.null(file.GM) & (!is.null(GP)|!is.null(GK)) ) stop("GAPIT Ssays: Genotype data and map files should be in pair") if(!is.null(GD) & is.null(GM) & (is.null(GP)&is.null(GK)) &kinship.algorithm!="SUPER") stop("GAPIT Says: Genotype data and map files should be in pair") if(is.null(GD) & !is.null(GM) & (is.null(GP)&is.null(GK)) &kinship.algorithm!="SUPER") stop("GAPIT Says: Genotype data and map files should be in pair") #if(!byData & !byFile) stop("APIT Ssays: Either genotype data or files should be given!") #if(byData&(!is.null(file.path))) stop ("APIT Ssays: You have provided geotype data. file.path should not be provided!") #print("Pass compatibility of input") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype loaded") Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype loaded") #Inital GLD GLD=NULL SNP.QTN=NULL #Intitial GT=NULL #Handler of read data in numeric format (EMMA) #Rename GM as GI if(!is.null(GM))GI=GM rm(GM) gc() #Extract GD and GT from read data GD if(!is.null(GD) ) { GT=as.matrix(GD[,1]) #get taxa GD=as.matrix(GD[,-1]) #remove taxa column Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GT created from GD)") Memory=GAPIT.Memory(Memory=Memory,Infor="GT created from GD") } #Hapmap format if(!is.null(G)) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Before HapMap") Memory=GAPIT.Memory(Memory=Memory,Infor="Before HapMap") #Convert HapMap to numerical print(paste("Converting genotype...",sep="")) hm=GAPIT.HapMap(G,SNP.effect=SNP.effect,SNP.impute=SNP.impute, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="after HapMap") Memory=GAPIT.Memory(Memory=Memory,Infor="after HapMap") #Extracting SNP for LD plot if(!is.null(LD.chromosome)) { #print("Extracting SNP for LD plot...") chromosome=(G[,3]==LD.chromosome[1]) bp=as.numeric(as.vector(G[,4])) deviation=abs(bp-as.numeric(as.vector(LD.location[1])) ) location=deviation< as.numeric(as.vector(LD.range[1]) ) index=chromosome&location GLD=G[index,] }else{ #print("No data in GLD") GLD=NULL } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="HapMap") Memory=GAPIT.Memory(Memory=Memory,Infor="HapMap") print(paste("Converting genotype done.",sep="")) #rm(G) #gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="G removed") Memory=GAPIT.Memory(Memory=Memory,Infor="G removed") GT=hm$GT GD=hm$GD GI=hm$GI # #print(unique(GI[,2])) rm(hm) gc() Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="hm removed") Memory=GAPIT.Memory(Memory=Memory,Infor="hm removed") } #From files if(!byData & byFile) { #print("Loading genotype from files...") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="byFile") Memory=GAPIT.Memory(Memory=Memory,Infor="byFile") numFileUsed=file.to if(!needKinPC) numFileUsed=file.from #Initial GI as storage GD=NULL GT=NULL GI=NULL GLD=NULL #multiple fragments or files for (file in file.from:numFileUsed) { frag=1 numSNP=file.fragment myFRG=NULL #print(paste("numSNP before while is ",numSNP)) while(numSNP==file.fragment) { #this is problematic if the read end at the last line print(paste("Reading file: ",file,"Fragment: ",frag)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Before Fragment") Memory=GAPIT.Memory(Memory=Memory,Infor="Before Fragment") myFRG=GAPIT.Fragment( file.path=file.path,file.from=file.from, file.to=file.to,file.total=file.total,file.G=file.G,file.Ext.G=file.Ext.G, seed=seed,SNP.fraction=SNP.fraction,SNP.effect=SNP.effect,SNP.impute=SNP.impute,genoFormat=genoFormat, file.GD=file.GD,file.Ext.GD=file.Ext.GD,file.GM=file.GM,file.Ext.GM=file.Ext.GM, file.fragment=file.fragment,file=file,frag=frag, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="After Fragment") Memory=GAPIT.Memory(Memory=Memory,Infor="After Fragment") if(is.null(GT) & !is.null(myFRG$GT))GT= as.matrix(myFRG$GT) if(is.null(GD)) { GD= myFRG$GD }else{ if(!is.null(myFRG$GD)) { GD=cbind(GD,myFRG$GD) } } if(is.null(GI)) { GI= myFRG$GI }else{ if(!is.null(myFRG$GI)) { colnames(myFRG$GI)=c("SNP","Chromosome","Position") GI=as.data.frame(rbind(as.matrix(GI),as.matrix(myFRG$GI))) } } if(is.null(G)) { G= myFRG$G }else{ if(!is.null(myFRG$G)) { G=as.data.frame(rbind(as.matrix(G),as.matrix(myFRG$G[-1,]))) } } if(is.null(GLD)) { GLD= myFRG$GLD }else{ if(!is.null(myFRG$GLD)) { if(myFRG$heading) { GLD=as.data.frame(rbind(as.matrix(GLD),as.matrix(myFRG$GLD[-1,]))) }else{ GLD=as.data.frame(rbind(as.matrix(GLD),as.matrix(myFRG$GLD))) } } } if(file==file.from & frag==1)GT=as.matrix(myFRG$GT) frag=frag+1 if(!is.null(myFRG$GI)) { numSNP=myFRG$linesRead[1] }else{ numSNP=0 } if(!needKinPC)numSNP=0 #force to end the while loop if(is.null(myFRG))numSNP=0 #force to end the while loop Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="END this Fragment") Memory=GAPIT.Memory(Memory=Memory,Infor="END this Fragment") } #end whileof repeat on fragment # print("This file is OK") } #end of file loop print("All files loaded") } #end of if(!byData&byFile) #GM=as.matrix(GI) #GI=GM GM=GI # modified by Jiabo in 20190927. sorted number of chrom by numeric and charicter chor_taxa=as.character(unique(GM[,2])) chor_taxa[order(gsub("([A-Z]+)([0-9]+)", "\\1", chor_taxa), as.numeric(gsub("([A-Z]+)([0-9]+)", "\\2", chor_taxa)))] chr_letter=grep("[A-Z]|[a-z]",chor_taxa) if(!setequal(integer(0),chr_letter)) { GI=as.matrix(GI) for(i in 1:(length(chor_taxa))) { index=GM[,2]==chor_taxa[i] GI[index,2]=i } } #print(head(GI)) #print(head(GI)) #print("@@@@@@@@@@@") #print(GD[1:5,1:5]) #print(dim(GI)) #Follow the MAF to filter markers if(!is.null(GD)) { #maf=apply(as.matrix(GD),2,function(one) abs(1-sum(one)/(2*nrow(GD)))) #maf[maf>0.5]=1-maf[maf>0.5] ss=apply(GD,2,sum) maf=apply(cbind(.5*ss/(nrow(GD)),1-.5*ss/(nrow(GD))),1,min) #print(max(maf)) #print(min(maf)) maf_index=maf>=SNP.MAF print(paste("GAPIT will filter marker with MAF setting !!")) print(paste("The markers will be filtered by SNP.MAF: ",SNP.MAF,sep="")) print(table(maf_index)) #print(head(maf[!maf_index])) GD=GD[,maf_index] GM=as.data.frame(GI[maf_index,]) GI=GM } #print("file loaded") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Sampling genotype") Memory=GAPIT.Memory(Memory=Memory,Infor="Sampling genotype") #print(KI) #Plot third part kinship if(!is.null(KI)&file.output) { if(KI!=1) { if(nrow(KI)<1000) { print("Plotting Kinship") #print(dim(KI)) theKin=as.matrix(KI[,-1]) line.names <- KI[,1] colnames(theKin)=KI[,1] rownames(theKin)=KI[,1] distance.matrix=dist(theKin,upper=TRUE) hc=hclust(distance.matrix,method=kinship.cluster) hcd = as.dendrogram(hc) ##plot NJtree if(!is.null(NJtree.group)) { clusMember <- cutree(hc, k = NJtree.group) compress_z=table(clusMember,paste(line.names)) type_col=rainbow(NJtree.group) Optimum=c(nrow(theKin),kinship.cluster,NJtree.group) } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="set kinship") Memory=GAPIT.Memory(Memory=Memory,Infor="set kinship") print("Creating heat map for kinship...") pdf(paste("GAPIT.Kin.thirdPart.pdf",sep=""), width = 12, height = 12) par(mar = c(25,25,25,25)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="prepare heatmap") Memory=GAPIT.Memory(Memory=Memory,Infor="prepare heatmap") heatmap.2(theKin, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none") dev.off() print("Kinship heat map PDF created!") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="plot heatmap") Memory=GAPIT.Memory(Memory=Memory,Infor="plot heatmap") ## Jiabo Wang add NJ Tree of kinship at 4.5.2017 if(!is.null(NJtree.group)) { for(tr in 1:length(NJtree.type)) { print("Creating NJ Tree for kinship...") pdf(paste("GAPIT.Kin.NJtree.",NJtree.type[tr],".pdf",sep=""), width = 12, height = 12) par(mar = c(5,5,5,5)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="prepare NJ TREE") Memory=GAPIT.Memory(Memory=Memory,Infor="prepare NJ TREE") plot(as.phylo(hc), type = NJtree.type[tr], tip.color =type_col[clusMember], use.edge.length = TRUE, col = "gray80",cex=0.6) legend("topright",legend=paste(c("Tatal individuals is: ","Cluster method: ","Group number: "), Optimum[c(1:3)], sep=""),lty=0,cex=1.3,bty="n",bg=par("bg")) dev.off() } } write.table(compress_z,paste("GAPIT.Kin.NJtree.compress_z.txt",sep=""),quote=F) print("Kinship NJ TREE PDF created!") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="plot NJ TREE") Memory=GAPIT.Memory(Memory=Memory,Infor="plot NJ TREE") #rm(hc,clusMember) }#end ## NJ Tree end } #end of if(nrow(KI)<1000) } #end of if(KI!=1) } #end of if(!is.null(KI)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Before SUPER") Memory=GAPIT.Memory(Memory=Memory,Infor="Before SUPER") #SUPER if(!is.null(GP) & kinship.algorithm=="SUPER" & !is.null(bin.size) & !is.null(inclosure.size)) { mySpecify=GAPIT.Specify(GI=GI,GP=GP,bin.size=bin.size,inclosure.size=inclosure.size) SNP.QTN=mySpecify$index if(!is.null(GD)) { GK=GD[,SNP.QTN] SNPVar=apply(as.matrix(GK),2,var) GK=GK[,SNPVar>0] GK=cbind(as.data.frame(GT),as.data.frame(GK)) #add taxa } } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Before creating kinship") Memory=GAPIT.Memory(Memory=Memory,Infor="Before creating kinship") PC=NULL thePCA=NULL if(PCA.total>0 | kinship.algorithm=="Separation") { thePCA=GAPIT.PCA(X = GD, taxa = GT, PC.number = PCA.total,file.output=file.output,PCA.total=PCA.total,PCA.col=PCA.col,PCA.3d=PCA.3d) PC=thePCA$PCs[,1:(1+PCA.total)] Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PCA") Memory=GAPIT.Memory(Memory=Memory,Infor="PCA") print("PC created") } #Create kinship from genotype if not provide if(is.null(KI) & (!is.null(GD) |!is.null(GK)) & !kinship.algorithm%in%c("FarmCPU","Blink","MLMM")) { print("Calculating kinship...") if(!is.null(GK)) { thisGD=GK[,-1] myGT=as.matrix(GK[,1]) print("GK is used to create KI") }else{ thisGD=GD myGT=GT } print(paste("Number of individuals and SNPs are ",nrow(thisGD)," and ",ncol(thisGD))) theKin=NULL if(is.null(PCA.col)&!is.null(NJtree.group))PCA.col=type_col[clusMember] if(kinship.algorithm=="EMMA") { half.thisGD = as.matrix(.5*thisGD) if(length(which(is.na(half.thisGD))) > 0) { print("Substituting missing values with heterozygote for kinship matrrix calculation....") half.thisGD[which(is.na(half.thisGD))] = 1 } theKin= emma.kinship(snps=t(as.matrix(.5*thisGD)), method="additive", use="all") } if(kinship.algorithm=="Loiselle")theKin= GAPIT.kinship.loiselle(snps=t(as.matrix(.5*thisGD)), method="additive", use="all") if(kinship.algorithm=="VanRaden")theKin= GAPIT.kinship.VanRaden(snps=as.matrix(thisGD)) if(kinship.algorithm=="Zhang")theKin= GAPIT.kinship.ZHANG(snps=as.matrix(thisGD)) if(kinship.algorithm=="Separation")theKin= GAPIT.kinship.separation(PCs=thePCA$PCs,EV=thePCA$EV,nPCs=PCA.total) if(!is.null(theKin)) { colnames(theKin)=myGT rownames(theKin)=myGT line.names <- myGT if (!is.null(NJtree.group)) { distance.matrix=dist(theKin,upper=TRUE) hc=hclust(distance.matrix,method=kinship.cluster) hcd = as.dendrogram(hc) clusMember <- cutree(hc, k = NJtree.group) compress_z=table(clusMember,paste(line.names)) type_col=rainbow(NJtree.group) Optimum=c(nrow(theKin),kinship.cluster,NJtree.group) } print("kinship calculated") if(length(GT)<1000 &file.output) { #Create heat map for kinship print("Creating heat map for kinship...") pdf(paste("GAPIT.Kin.",kinship.algorithm,".pdf",sep=""), width = 12, height = 12) par(mar = c(25,25,25,25)) heatmap.2(theKin, cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none") dev.off() print("Kinship heat map created") ## Jiabo Wang add NJ Tree of kinship at 4.5.2017 if (!is.null(NJtree.group)) { print("Creating NJ Tree for kinship...") for(tr in 1:length(NJtree.type)) { pdf(paste("GAPIT.Kin.NJtree.",NJtree.type[tr],".pdf",sep=""), width = 12, height = 12) par(mar = c(0,0,0,0)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="prepare NJ TREE") Memory=GAPIT.Memory(Memory=Memory,Infor="prepare NJ TREE") plot(as.phylo(hc), type = NJtree.type[tr], tip.color =type_col[clusMember], use.edge.length = TRUE, col = "gray80",cex=0.6) #legend("topright",legend=c(paste("Tatal numerber of individuals is ",),lty=0,cex=1.3,bty="n",bg=par("bg")) legend("topright",legend=paste(c("Tatal individuals is: ","Group method: ","Group number: "), Optimum[c(1:3)], sep=""),lty=0,cex=1.3,bty="n",bg=par("bg")) dev.off() } # print(Optimum) write.table(compress_z,paste("GAPIT.Kin.NJtree.compress_z.txt",sep=""),quote=F) print("Kinship NJ TREE PDF created!") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="plot NJ TREE") Memory=GAPIT.Memory(Memory=Memory,Infor="plot NJ TREE") rm(hc) }#end NJtree } print("Adding IDs to kinship...") #Write the kinship into a text file KI=cbind(myGT,as.data.frame(theKin)) #This require big memory. Need a way to solve it. print("Writing kinship to file...") if(file.output) write.table(KI, paste("GAPIT.Kin.",kinship.algorithm,".csv",sep=""), quote = FALSE, sep = ",", row.names = FALSE,col.names = FALSE) print("Kinship save as file") rm(theKin) gc() } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Estimating kinship") Memory=GAPIT.Memory(Memory=Memory,Infor="Estimating kinship") print("Kinship created!") } #end of if(is.null(KI)&!is.null(GD)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="after creating kinship") Memory=GAPIT.Memory(Memory=Memory,Infor="after creating kinship") #LD plot #print("LD section") if(!is.null(GLD) &file.output) { if(nrow(GLD)>500) { GLD=GLD[1,] print("WARNING: The number of SNPs requested is beyond limitation. No LD plot created.") } if(nrow(GLD)>1) { print("Plot LD...") hapmapgeno= data.frame(as.matrix(t(GLD[,-c(1:11)]))) hapmapgeno[hapmapgeno=="NN"]=NA hapmapgeno[hapmapgeno=="XX"]=NA hapmapgeno[hapmapgeno=="--"]=NA hapmapgeno[hapmapgeno=="++"]=NA hapmapgeno[hapmapgeno=="//"]=NA LDdist=as.numeric(as.vector(GLD[,4])) LDsnpName=GLD[,1] colnames(hapmapgeno)=LDsnpName #Prune SNM names #LDsnpName=LDsnpName[GAPIT.Pruning(LDdist,DPP=7)] LDsnpName=LDsnpName[c(1,length(LDsnpName))] #keep the first and last snp names only #print(hapmapgeno) print("Getting genotype object") LDsnp=makeGenotypes(hapmapgeno,sep="",method=as.genotype) #This need to be converted to genotype object print("Caling LDheatmap...") pdf(paste("GAPIT.LD.chromosom",LD.chromosome,"(",round(max(0,LD.location-LD.range)/1000000),"_",round((LD.location+LD.range)/1000000),"Mb)",".pdf",sep=""), width = 12, height = 12) #pdf(paste("GAPIT.LD.pdf",sep=""), width = 12, height = 12) par(mar = c(25,25,25,25)) MyHeatmap <- try(LDheatmap(LDsnp, LDdist, LDmeasure="r", add.map=TRUE, SNP.name=LDsnpName,color=rev(cm.colors(20)), name="myLDgrob", add.key=TRUE,geneMapLabelY=0.1) ) if(!inherits(MyHeatmap, "try-error")) { #Modify the plot grid.edit(gPath("myLDgrob", "Key", "title"), gp=gpar(cex=.5, col="blue")) #edit key title size and color grid.edit(gPath("myLDgrob", "geneMap", "title"), gp=gpar(just=c("center","bottom"), cex=0.8, col="black")) #Edit gene map title grid.edit(gPath("myLDgrob", "geneMap","SNPnames"), gp = gpar(cex=0.3,col="black")) #Edit SNP name }else{ print("Warning: error in converting genotype. No LD plot!") } dev.off() print("LD heatmap crated") }else{ # alternative of if(nrow(GLD)>1) print("Warning: There are less than two SNPs on the region you sepcified. No LD plot!") } #end of #if(nrow(GLD)>1) }#end of if(!is.null(GLD)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="after LD plot") Memory=GAPIT.Memory(Memory=Memory,Infor="after LD plot") ###output Marker density and decade of linkage disequilibrium over distance if(!is.null(GI) & !is.null(GD) & file.output & Geno.View.output) { ViewGenotype<-GAPIT.Genotype.View( myGI=GI, myGD=GD, ) } #print("Genotype successfully acomplished") return (list(G=G,GD=GD,GI=GI,GT=GT,hasGenotype=hasGenotype, genoFormat=genoFormat, KI=KI,PC=PC,byFile=byFile,fullGD=fullGD,Timmer=Timmer,Memory=Memory,SNP.QTN=SNP.QTN,chor_taxa=chor_taxa)) } #============================================================================================= `GAPIT.Genotype.View` <-function(myGI=NULL,myGD=NULL,chr=NULL, w1_start=NULL,w1_end=NULL,mav1=NULL){ # Object: Analysis for Genotype data:Distribution of SNP density,Accumulation,Moving Average of density,result:a pdf of the scree plot # myG:Genotype data # chr: chromosome value # w1_start:Moving Average windows Start Position # w1_end:Moving Average windows End Position # mav1:Moving Average set value length # Authors: You Tang and Zhiwu Zhang # Last update: March 11, 2016 ############################################################################################## #if(nrow(myGI)<1000) return() #Markers are not enough for this analysis if(is.null(myGI)){stop("Validation Invalid. Please select read valid Genotype flies !")} if(is.null(myGD)){stop("Validation Invalid. Please select read valid Genotype flies !")} if(is.null(w1_start)){w1_start=1} ##if(is.null(w1_end)){w1_end=100} if(is.null(mav1)){mav1=10} if(is.null(chr)){chr=1} #heterozygosity of individuals and SNPs (By Zhiwu Zhang) #print("Heterozygosity of individuals and SNPs (By Zhiwu Zhang)") X=myGD[,-1] H=1-abs(X-1) het.ind=apply(H,1,mean) het.snp=apply(H,2,mean) ylab.ind=paste("Frequency (out of ",length(het.ind)," individuals)",sep="") ylab.snp=paste("Frequency (out of ",length(het.snp)," markers)",sep="") pdf("GAPIT.Heterozygosity.pdf", width =10, height = 6) par(mfrow=c(1,2),mar=c(5,5,1,1)+0.1) hist(het.ind,col="gray", main="",ylab=ylab.ind, xlab="Heterozygosity of individuals") hist(het.snp,col="gray", main="",ylab=ylab.snp, xlab="Heterozygosity of markers") dev.off() rm(X, H, het.ind, het.snp) #Feree memory myFig21<-myGI myFig21<-myFig21[!is.na(as.numeric(as.matrix(myFig21[,3]))),] n<-nrow(myFig21) maxchr<-0 for(i in 1:n){ if(as.numeric(as.matrix(myFig21[i,2]))>maxchr){ maxchr<-as.numeric(as.matrix(myFig21[i,2])) } } n_end<-maxchr if(maxchr==0){ chr=0 } #n_end<-as.numeric(as.matrix(myFig21[n,2])) aaa<-NULL for(i in 0:n_end){ #myChr<-myFig21[myFig21[,2]==i,] myChr<-myFig21[as.numeric(as.matrix(myFig21[,2]))==i,] index<-order(as.numeric(as.matrix(as.data.frame(myChr[,3])))) aaa<-rbind(aaa,myChr[index,]) } myFig2<-aaa if(is.null(w1_end)){ if(nrow(myFig2[as.numeric(as.matrix(myFig2[,2]))==chr,])>100){ w1_end=100 }else{ w1_end=nrow(myFig2[as.numeric(as.matrix(myFig2[,2]))==chr,]) } } subResult<-matrix(0,n,1) for(i in 1 :( n-1)) { k<-as.numeric(as.matrix(myFig2[i+1,3]))-as.numeric(as.matrix(myFig2[i,3])) if(k>0){ subResult[i]<-k } else{ subResult[i]<-0 }} results<-cbind(myFig2,subResult) #####Out Distribution of SNP density ########## #####Out Accumulation########## kk0<-order(as.numeric(as.matrix(results[,4]))) myFig22<-results[kk0,] m<-nrow(myFig22) kk1<-matrix(1:m,m,1) results2<-cbind(myFig22,kk1) max2<-max(myFig22[,4]) pdf("GAPIT.Marker.Density.pdf", width =10, height = 6) par(mar=c(5,5,4,5)+0.1) hist(as.numeric(as.matrix(results[,4])),xlab="Density",main="Distribution of SNP",breaks=12, cex.axis=0.9,col = "dimgray",cex.lab=1.3)###,xlim=c(0,25040359)) par(new=T) plot(results2[,4],results2[,5]/m,xaxt="n", yaxt="n",bg="lightgray",xlab="",ylab="",type="l",pch=20,col="#990000",cex=1.0,cex.lab=1.3, cex.axis=0.9, lwd=3,las=1,xlim=c(0,max2)) axis(4,col="#990000",col.ticks="#990000",col.axis="#990000") mtext("Accumulation Frequency",side=4,line=3,font=2,font.axis=1.3,col="#990000") abline(h=0,col="forestgreen",lty=2) abline(h=1,col="forestgreen",lty=2) dev.off() #####Out Moving Average of density########## #print(unique(myGI[,2])) myGD0<-myGD[,as.numeric(myGI[,2])==chr] gc() myGM0<-myGI[myGI[,2]==chr,] ##remove invalid SNPs #X<-myGD0[,-1] X<-myGD0 colMax=apply(X,2,max) colMin=apply(X,2,min) #mono=as.numeric(colMax)-as.numeric(colMin) mono=colMax-colMin index=mono<10E-5 X=X[,!index] myFig3<-myGM0[!index,] n3<-nrow(myFig3) kk3<-order(as.numeric(as.matrix(myFig3[,3]))) myFig23<-myFig3[kk3,] myGD3<-X[,kk3] ##set windows long ##w1_start<-30 ##w1_end<-230 ###get windows numeric snp at the same chr #print(w1_start) #print(w1_end) #print(dim(myFig3)) if(nrow(myFig23)0&!is.null(DP$CV))CV=GAPIT.CVMergePC(DP$CV,PC) if(DP$PCA.total>0&is.null(DP$CV))CV=PC taxa_comGD=as.character(GD[,1]) taxa_comY=as.character(Y[,1]) taxa_CV=as.character(CV[,1]) taxa_comall=intersect(intersect(taxa_comGD,taxa_comY),taxa_CV) comCV=CV[taxa_CV%in%taxa_comall,] comGD=GD[taxa_comGD%in%taxa_comall,] comY=Y[taxa_comY%in%taxa_comall,] GT=as.matrix(as.character(taxa_comall)) print(paste("There are ",length(GT)," common individuals in genotype , phenotype and CV files.",sep="")) if(nrow(comCV)!=length(GT))stop ("GAPIT says: The number of individuals in CV does not match to the number of individuals in genotype files.") print("The dimension of total CV is ") print(dim(comCV)) print("GAPIT.IC accomplished successfully for multiple traits. Results are saved") if(DP$kinship.algorithm%in%c("FarmCPU","Blink","MLMM")){ return (list(Y=comY,GT=GT,PCA=comCV,K=DP$KI,GD=comGD,GM=DP$GM,myallCV=CV,myallGD=GD)) }else{ return (list(Y=comY,GT=GT,PCA=comCV,K=DP$KI,GD=comGD,GM=DP$GM,myallCV=CV,myallGD=GD,myallY=Y)) } } #end of GAPIT IC function #============================================================================================= `GAPIT.ID` <- function(DP=NULL,IC=NULL,SS=NULL,RS=NULL,cutOff=0.01, DPP=100000,Create.indicator=FALSE, FDR.Rate = 1,QTN.position=NULL,plot.style="Oceanic", file.output=TRUE,SNP.MAF=0,CG=NULL,plot.bin=10^9 ){ #Object: To Interpretation and Diagnoses #Designed by Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novenber 3, 2016 ############################################################################################## print("GAPIT.ID in process...") #Define the funcitno here if(is.null(DP)&is.null(IC))#inputdata is other method result { GWAS=RS GI=RS[,1:3] GI=GI[order(GI[,2]),] GI=GI[order(GI[,1]),] #print(QTN.position) ps=RS[,4] nobs=nrow(RS) if(ncol(RS)>4) {maf=RS[,5] maf_pass=TRUE } if(ncol(RS)<5) {maf_pass=FALSE maf=0.5 } rsquare_base=rep(NA,length(ps)) rsquare=rep(NA,length(ps)) df=rep(NA,length(nobs)) tvalue=rep(NA,length(nobs)) stderr=rep(NA,length(nobs)) effect.est=rep(NA,length(nobs)) if(is.na(maf[1])) maf=matrix(.5,nrow(GWAS),1) print("Filtering SNPs with MAF..." ) index=maf>=SNP.MAF PWI.Filtered=cbind(GI,ps,maf,nobs,rsquare_base,rsquare)#[index,] colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs", "Rsquare.of.Model.without.SNP","Rsquare.of.Model.with.SNP") if(!is.null(PWI.Filtered)) { print("Calculating FDR..." ) PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH") print("QQ plot..." ) if(file.output) GAPIT.QQ(P.values = ps, name.of.trait = name.of.trait,DPP=DPP) print("Manhattan plot (Genomewise)..." ) if(file.output) GAPIT.Manhattan(GI.MP = cbind(GI[,-1],ps), name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=QTN.position,plot.style=plot.style,plot.bin=plot.bin) print("Manhattan plot (Chromosomewise)..." ) #if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff) if(file.output) GAPIT.Manhattan(GI.MP = cbind(GI[,-1],ps), name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff,plot.bin=plot.bin) #Association Table print("Association table..." ) #GAPIT.Table(final.table = PWIP$PWIP, name.of.trait = name.of.trait,SNP.FDR=SNP.FDR) # GWAS=PWIP$PWIP[PWIP$PWIP[,9]<=DP$SNP.FDR,] # print(head(GWAS)) print("Joining tvalue and stderr" ) DTS=cbind(GI,df,tvalue,stderr,effect.est) colnames(DTS)=c("SNP","Chromosome","Position","DF","t Value","std Error","effect") print("Creating ROC table and plot" ) if(file.output) myROC=GAPIT.ROC(t=tvalue,se=stderr,Vp=var(ys),trait=name.of.trait) print("ROC table and plot created" ) print("MAF plot..." ) if(file.output&maf_pass) myMAF1=GAPIT.MAF(MAF=maf,P=ps,E=NULL,trait=name.of.trait) if(file.output){ write.table(GWAS, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) write.table(DTS, paste("GAPIT.", name.of.trait, ".Df.tValue.StdErr.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #if(!byPass) write.table(GWAS.2, paste("GAPIT.", name.of.trait, ".Allelic_Effect_Estimates.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) }#end file.output }#end DP }else{ #inputdata is GAPIT3 result name.of.trait=DP$memo GWAS=SS$GWAS #print(head(GWAS)) Pred=SS$Pred GI=GWAS GI=GI[order(GI[,3]),] GI=GI[order(GI[,2]),] byPass=TRUE if(DP$kinship.algorithm%in%c("FarmCPU","MLMM","Blink"))byPass=FALSE if(byPass) { # print(head(SS$GWAS)) ps=SS$TV$ps nobs=SS$TV$nobs maf=GWAS$maf #maf=SS$TV$maf rsquare_base=SS$TV$rsquare_base rsquare=SS$TV$rsquare df=SS$TV$df tvalue=SS$TV$tvalue stderr=SS$TV$stderr effect.est=SS$mc effect=SS$mc #GI=cbind(GI,effect) if(DP$file.output&!is.null(SS$Compression)&!is.na(SS$Compression[1,6])) GAPIT.Compression.Visualization(Compression = SS$Compression, name.of.trait = DP$name.of.trait) }else{ maf=GI$maf ps=GI$P.value nobs=GI$nobs rsquare_base=rep(NA,length(ps)) rsquare=rep(NA,length(ps)) df=rep(NA,length(nobs)) tvalue=rep(NA,length(nobs)) stderr=rep(NA,length(nobs)) effect.est=GI$effect } if(is.na(maf[1])) maf=matrix(.5,nrow(GI),1) if(!is.null(IC$GD)&DP$SNP.test) { print("Filtering SNPs with MAF..." ) #index=maf>=DP$SNP.MAF #PWI.Filtered=cbind(GI[,-5],rsquare_base,rsquare) PWI.Filtered=cbind(GWAS[,1:6],rsquare_base,rsquare) colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs", "Rsquare.of.Model.without.SNP","Rsquare.of.Model.with.SNP") if(!is.null(PWI.Filtered)) { #Run the BH multiple correction procedure of the results #Create PWIP, which is a table of SNP Names, Chromosome, bp Position, Raw P-values, FDR Adjusted P-values print("Calculating FDR..." ) PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH") #print(str(PWIP)) print("QQ plot..." ) if(DP$file.output) GAPIT.QQ(P.values = GI$P.value, name.of.trait = DP$name.of.trait,DPP=DP$DPP) print("Manhattan plot (Genomewise)..." ) if(DP$file.output) GAPIT.Manhattan(GI.MP = GWAS[,2:4], name.of.trait = DP$name.of.trait, DPP=DP$DPP, plot.type = "Genomewise",cutOff=DP$cutOff,seqQTN=DP$QTN.position,plot.style=DP$plot.style,plot.bin=DP$plot.bin,chor_taxa=DP$chor_taxa) #print("@@@@@@@@@@@@@@@@@@@@@@@@") print("Manhattan plot (Chromosomewise)..." ) if(DP$file.output) GAPIT.Manhattan(GI.MP = GWAS[,2:4],GD=IC$GD[,-1], CG=DP$CG,name.of.trait = DP$name.of.trait, DPP=DP$DPP, plot.type = "Chromosomewise",cutOff=DP$cutOff,plot.bin=DP$plot.bin) #Association Table print("Association table..." ) print("Joining tvalue and stderr" ) # print(head(GWAS)) # print(length(df)) # print(length(tvalue)) # print(length(stderr)) # print(length(effect.est)) DTS=cbind(GWAS[,1:3],df,tvalue,stderr,effect.est) colnames(DTS)=c("SNP","Chromosome","Position","DF","t Value","std Error","effect") print("Creating ROC table and plot" ) if(DP$file.output) myROC=GAPIT.ROC(t=tvalue,se=stderr,Vp=var(as.matrix(DP$Y[,2])),trait=DP$name.of.trait) print("ROC table and plot created" ) print("MAF plot..." ) if(DP$file.output) myMAF1=GAPIT.MAF(MAF=maf,P=ps,E=NULL,trait=DP$name.of.trait) print("GAPIT.Interactive.Manhattan") print(DP$Inter.type) #GI=GI[order(GI[,4]),] #print(head(GI)) if(ncol(GI)>1) {new_GI=merge(PWIP$PWIP,GI[,c("SNP","effect")],by.x="SNP",by.y="SNP") }else{ new_GI=GI } new_GI=new_GI[order(new_GI[,4]),] #print(head(new_GI)) if(DP$file.output&DP$Inter.Plot) GAPIT.Interactive.Manhattan(GWAS=new_GI,X_fre=maf,plot.type=DP$Inter.type,name.of.trait = DP$name.of.trait) if(DP$file.output){ write.table(new_GI, paste("GAPIT.", DP$name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) write.table(DTS, paste("GAPIT.", DP$name.of.trait, ".Df.tValue.StdErr.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #print(head(GWAS.2)) #if(byPass) write.table(GWAS.2[,1:4], paste("GAPIT.", DP$name.of.trait, ".Allelic_Effect_Estimates.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) }#end file.output }#PWI.Filtered }#end IC$GD) print("GAPIT.ID accomplished successfully for multiple traits. Results are saved") return () }#is.null(DP)&is.null(IC) } #end of GAPIT.ID function #============================================================================================= `GAPIT.Imputation` <- function(x,GI=NULL,impute="Middle",byRow=TRUE){ #Object: To impute NA in genome #Output: Coresponding numerical value #Authors: Zhiwu Zhang #Writer: Jiabo Wang # Last update: April 13, 2016 ############################################################################################## n=length(x) lev=levels(as.factor(x)) lev=setdiff(lev,NA) #print(lev) len=length(lev) count=1:len for(i in 1:len){ count[i]=length(x[(x==lev[i])]) } position=order(count) #print(position) if(impute=="Middle") {x[is.na(x)]=1 } if(len==3){ if(impute=="Minor") {x[is.na(x)]=position[1] -1} if(impute=="Major") {x[is.na(x)]=position[len]-1} }else{ if(impute=="Minor") {x[is.na(x)]=2*(position[1] -1)} if(impute=="Major") {x[is.na(x)]=2*(position[len]-1)} } if(byRow) { result=matrix(x,n,1) }else{ result=matrix(x,1,n) } return(result) }#end of GAPIT.Numericalization function #============================================================================================= `GAPIT.Interactive.GS`<- function(model_store=NULL,Y=NULL,myGD=NULL,myGM=NULL,myKI=NULL,myY=NULL,myCV=NULL,rel=NULL,h2=NULL,NQTN=NULL ) #model_store is the store of all model names #Y is the real phenotype # { # e=20 # #NQTN=100 # #h2=0.25 # taxa=as.character(myGD[,1]) # myY=Y0[Y0[,1]%in%taxa,c(1,e)] # myGD=myGD[taxa%in%myY[,1],] # nfold=5 # repli=1 # sets=sample(cut(1:nrow(myY ),nfold,labels=FALSE),nrow(myY )) # j=1 # training=myY # training[sets==j,2]=NA # training_index=is.na(training[,2]) # testing=myY[training_index,] # cblup_gapit=GAPIT(Y=training,CV=PC,PCA.total=0,KI=myKI,group.from=200,group.to=2000,group.by=600,SNP.test=F,file.output=F) # gblup_gapit=GAPIT(Y=training,CV=PC,PCA.total=0,KI=myKI,group.from=2000,group.to=2000,group.by=100,SNP.test=F,file.output=F) # sblup_gapit=GAPIT(Y=training,CV=PC,PCA.total=0,GD=myGD,GM=myGM,group.from=2000,SUPER_GS=TRUE,sangwich.top="MLM",sangwich.bottom="SUPER",LD=0.1,SNP.test=F,file.output=F,inclosure.from=200,inclosure.to=1000,inclosure.by=200,bin.from=10000,bin.to=100000,bin.by=10000) # cblup_pred=cblup_gapit$Pred[training_index,] # gblup_pred=gblup_gapit$Pred[training_index,] # sblup_pred=sblup_gapit$Pred[training_index,] # testing_index=!is.na(testing[,2]) # gblup_r_once=cor(testing[testing_index,2],gblup_pred[testing_index,8]) # cblup_r_once=cor(testing[testing_index,2],cblup_pred[testing_index,8]) # sblup_r_once=cor(testing[testing_index,2],sblup_pred[testing_index,8]) # result=cbind(testing[testing_index,],gblup_pred[testing_index,8],cblup_pred[testing_index,8],sblup_pred[testing_index,8]) # colnames(result)=c("taxa","observed","gBLUP","cBLUP","sBLUP") # gblup_r_once # cblup_r_once # sblup_r_once # write.table(result,paste("gcs_",e,".txt",sep="")) myY=read.table(paste("gcs_",e,".txt",sep=""),head=T) Observed=myY$observed Predicted=myY$gBLUP if(!require(plotly)) install.packages("plotly") library(plotly) p <- plot_ly( type = 'scatter', x = ~Observed, y = ~Predicted, data=myY, text = ~paste("Taxa: ",taxa,"
Observed: ",round(observed,4) , '
gBLUP:', round(gBLUP,4)), #size=2*y/max(y), color = I("red"), name=c("gBLUP") )%>%add_trace( type = 'scatter', x = ~observed, y = ~cBLUP, #data=myY, text = ~paste("Taxa: ",taxa,"
Observed: ",round(observed,4) , '
cBLUP:', round(cBLUP,4)), #size=2*y/max(y), color = I("blue"), name=c("cBLUP") )%>%add_trace( type = 'scatter', x = ~observed, y = ~sBLUP, #data=myY, text = ~paste("Taxa: ",taxa,"
Observed: ",round(observed,4) , '
sBLUP:', round(sBLUP,4)), #size=2*y/max(y), color = I("green"), name=c("sBLUP") ) htmltools::save_html(p, "Interactive.GS.html") } `GAPIT.Interactive.Manhattan`<- function(GWAS=NULL,MAF.threshold=seq(0,0.5,.1),cutOff=0.01,DPP=50000,X_fre=NULL,plot.type=c("m","q"),name.of.trait = "Trait" ) { if(is.null(GWAS)) stop("Please add GWAS result in here!!!") MP=GWAS[,2:4] #print(head(GWAS)) GWAS=GWAS[order(GWAS[,3]),] GWAS=GWAS[order(GWAS[,2]),] #print(GWAS[GWAS[,4]==min(GWAS[,4]),2]) taxa=as.character(GWAS[,1]) numMarker=nrow(GWAS) bonferroniCutOff01=-log10(0.01/numMarker) bonferroniCutOff05=-log10(0.05/numMarker) # deal with P value to log Ps=as.numeric(as.vector(GWAS[,4])) logPs <- -log10(Ps) logPs[is.na(logPs)]=0 y.lim <- ceiling(max(GWAS[,4])) chrom_total=as.numeric(as.character((GWAS[,2]))) #print(GWAS[GWAS[,4]==min(GWAS[,4]),]) #print("!!!!") #print(chrom_total[logPs==max(logPs)]) POS=as.numeric(as.vector(GWAS[,3])) #print(head(POS)) chm.to.analyze <- as.numeric(as.character(unique(GWAS[,2]))) chm.to.analyze=chm.to.analyze[order(as.numeric(as.character(chm.to.analyze)))] #chm.to.analyze = factor(sort(chm.to.analyze)) numCHR= length(chm.to.analyze) print(chm.to.analyze) ticks=NULL lastbase=0 #change base position to accumulatives (ticks) for (i in chm.to.analyze) { index=(chrom_total==i) ticks <- c(ticks, lastbase+mean(POS[index])) POS[index]=POS[index]+lastbase lastbase=max(POS[index]) } x0 <- POS y0 <- as.numeric(logPs) z0 <- chrom_total posi0<-as.numeric(as.vector(GWAS$Position)) maf0 <- as.numeric(as.vector(GWAS$maf)) effect0<- as.numeric(as.vector(GWAS$effect)) #print(head(z0)) position=order(y0,decreasing = TRUE) index0=GAPIT.Pruning(y0[position],DPP=DPP) index=position[index0] #order by P value x=x0[index] y=y0[index] z=z0[index] taxa=taxa[index] posi=posi0[index] maf=maf0[index] effect=effect0[index] plot.color=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(numCHR/5)) if(c("m")%in%plot.type) { Position=x P_value=y z[z<10]=paste("0",z[z<10],sep="") zz=paste("Chr_",z,sep="") #print(zz) if(!require(plotly)) install.packages("plotly") #print("!!!!!") #print(head(Position)) library(plotly) p <- plot_ly( type = 'scatter', x = ~Position, y = ~P_value, colorscale='Viridis', reversescale =T, #symbol="circle", text = ~paste("SNP: ", taxa, "
Posi: ", posi,"
MAF: ", round(maf,2),"
Effect: ",round(effect,2)), color = ~as.character(zz) )%>% add_trace(y=bonferroniCutOff01,name = 'CutOff-0.01',color=I("red"),mode="line",width=1.4,text="")%>% add_trace(y=bonferroniCutOff05,name = 'CutOff-0.05',color=I("red"),mode="line",line=list(width=1.4,dash='dot'),text="")%>% layout(title = "Interactive.Manhattan.Plot", #showticklabels = FALSE, #legend = list(orientation = 'h'), xaxis = list(title = "Chromsome",zeroline = FALSE,showticklabels = FALSE), yaxis = list (title = "-Log10(p)")) htmltools::save_html(p, paste("Interactive.Manhattan.",name.of.trait,".html",sep="")) } ################ for QQ plot if(c("q")%in%plot.type) { P.values=y p_value_quantiles <- (1:length(P.values))/(length(P.values)+1) log.P.values <- P.values log.Quantiles <- -log10(p_value_quantiles) index=GAPIT.Pruning(log.P.values,DPP=DPP) log.P.values=log.P.values[index ] log.Quantiles=log.Quantiles[index] N=length(P.values) N1=length(log.Quantiles) ## create the confidence intervals c95 <- rep(NA,N1) c05 <- rep(NA,N1) for(j in 1:N1){ i=ceiling((10^-log.Quantiles[j])*N) if(i==0)i=1 c95[j] <- qbeta(0.95,i,N-i+1) c05[j] <- qbeta(0.05,i,N-i+1) #print(c(j,i,c95[j],c05[j])) } #CI shade #plot3d(NULL, xlim = c(0,max(log.Quantiles)), zlim = c(0,max(log.P.values)), type="l",lty=5, lwd = 2, axes=FALSE, xlab="", ylab="",col="gray") index=length(c95):1 zz=paste("Chr_",z,sep="") Expected=log.Quantiles Observed=log.P.values #abline(a = 0, b = 1, col = "red",lwd=2) qp <- plot_ly( type = 'scatter', x = ~Expected, y = ~Observed, text = ~paste("SNP: ", taxa,"
Chr: ",zz,"
Posi: ", posi, "
MAF: ", round(maf,2),"
Effect: ",round(effect,2)), #size=2*y/max(y), name = "SNP", opacity=0.5, )%>%add_lines(x=log.Quantiles,y=log.Quantiles,color=I("red"), mode = 'lines',name="Diag",text="")%>% layout(title = "Interactive.QQ.Plot", xaxis = list(title = "Expected -Log10(p)"), yaxis = list (title = "Observed -Log10(p)"), #showticklabels = FALSE, showlegend = FALSE) htmltools::save_html(qp, paste("Interactive.QQ ",name.of.trait,".html",sep="")) } print("GAPIT.Interactive.Plot has done !!!") }#end of GAPIT.Interactive.Manhattan #============================================================================================= `GAPIT.Judge`<- function(Y=Y,G=NULL,GD=NULL,KI=NULL,GM=NULL,group.to=group.to,group.from=group.from,sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,kinship.algorithm=kinship.algorithm,PCA.total=PCA.total,model="MLM",SNP.test=TRUE){ #Object: To judge Pheno and Geno data practicability #Designed by Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novenber 3, 2016 ############################################################################################## print("--------------------Phenotype and Genotype ----------------------------------") if(ncol(Y)<2) stop ("Phenotype should have taxa name and one trait at least. Please correct phenotype file!") print(kinship.algorithm) if(is.null(KI)&is.null(GD) & kinship.algorithm!="SUPER"&is.null(G)) stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.") if(kinship.algorithm=="FarmCPU"&SNP.test==FALSE)stop("FarmCPU is only for GWAS, plase set: SNP.test= TRUE") #if((!is.null(GD))&(!is.null(G))) stop("GAPIT Says:Please put in only one type of geno data.") if(is.null(GD)&is.null(G)&is.null(KI))stop ("GAPIT Says:GAPIT need genotype!!!") if(!is.null(GD) & is.null(GM) & (is.null(G)) &SNP.test) stop("GAPIT Says: Genotype data and map files should be in pair") if(is.null(GD) & !is.null(GM) & (is.null(G)) &SNP.test) stop("GAPIT Says: Genotype data and map files should be in pair") if (is.null(GD[,1]%in%Y[,1])|is.null(colnames(G)[-c(1:11)]%in%Y[,1]))stop("GAPIT Says: There are no common taxa between genotype and phenotype") if (!is.null(Y)) nY=nrow(Y) if (!is.null(Y)) ntrait=ncol(Y)-1 print(paste("There are ",ntrait," traits in phenotype data.")) print(paste("There are ",nY," individuals in phenotype data.")) if (!is.null(G)) nG=nrow(G)-11 if (!is.null(GD)) {nG=ncol(GD)-1 print(paste("There are ",nG," markers in genotype data."))} print("Phenotype and Genotype are test OK !!") print("--------------------GAPIT Logical ----------------------------------") #if (group.to>nY&is.null(KI))group.to=nY #if (group.from>group.to&is.null(KI)) group.from=group.to if(!is.null(sangwich.top) & is.null(sangwich.bottom) ) stop("GAPIT Says: SUPER method need sangwich.top and bottom") if(is.null(sangwich.top) & !is.null(sangwich.bottom) ) stop("GAPIT Says: SUPER method need sangwich.top and bottom") if(kinship.algorithm=="Separation"&PCA.total==0) stop ("GAPIT Says: Separation kinship need PCA.total>0") return (list(group.to=group.to,group.from=group.from)) }#end of GAPIT.Pheno.Geno.judge function #============================================================================================= `GAPIT.Liner` <- function(Y,GD,CV){ #Object: To have Y, GD and CV the same size and order #Input: Y,GDP,GM,CV #Input: GD - n by m +1 dataframe or n by m big.matrix #Input: GDP - n by m matrix. This is Genotype Data Pure (GDP). THERE IS NOT COLUMN FOR TAXA. #Input: orientation-Marker in GDP go colmun or row wise #Requirement: Y, GDP and CV have same taxa order. GDP and GM have the same order on SNP #Output: GWAS,GPS,Pred #Authors: Zhiwu Zhang # Last update: Febuary 24, 2013 ############################################################################################## #print("GAPIT.Liner Started") #print(date()) #print("Memory used at begining of BUS") #print(memory.size()) #print("dimension of Y,GD and CV at begining") #print(dim(Y)) #print(dim(GD)) #print(dim(CV)) if(!is.null(CV))taxa=intersect(intersect(GD[,1],Y[,1]),CV[,1]) if(is.null(CV))taxa=intersect(GD[,1],Y[,1]) Y=Y[match(taxa, Y[,1], nomatch = 0),] GD=GD[match(taxa, GD[,1], nomatch = 0),] if(!is.null(CV)) CV=CV[match(taxa, CV[,1], nomatch = 0),] Y = Y[order(Y[,1]),] GD = GD[order(GD[,1]),] if(!is.null(CV)) CV = CV[order(CV[,1]),] #print("dimension of Y,GD and CV at end") #print(dim(Y)) #print(dim(GD)) #print(dim(CV)) print("GAPIT.Liner accomplished successfully") return (list(Y=Y,GD=GD,CV=CV)) }#The function GAPIT.Liner ends here #============================================================================================= `GAPIT.Log` <- function(Y=Y,KI=KI,Z=Z,CV=CV,SNP.P3D=SNP.P3D, group.from = group.from ,group.to =group.to ,group.by = group.by ,kinship.cluster = kinship.cluster, kinship.group= kinship.group, ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait){ #Object: To report model factors #Output: Text file (GAPIT.Log.txt) #Authors: Zhiwu Zhang # Last update: may 16, 2011 ############################################################################################## #Creat storage facto <- list(NULL) value <- list(NULL) #collecting model factors facto[[1]]="Trait" value[[1]]=paste(dim(Y)) facto[[2]]="group.by " value[[2]]=group.by facto[[3]]="Trait name " value[[3]]=name.of.trait facto[[4]]="Kinship" value[[4]]=dim(KI) facto[[5]]="Z Matrix" value[[5]]=dim(Z) facto[[6]]="Covariate" value[[6]]=dim(CV) facto[[7]]="SNP.P3D" value[[7]]=SNP.P3D facto[[8]]="Clustering algorithms" value[[8]]=kinship.cluster facto[[9]]="Group kinship" value[[9]]=kinship.group facto[[10]]="group.from " value[[10]]=group.from facto[[11]]="group.to " value[[11]]=group.to theLog=as.matrix(cbind(facto,value)) #theLog=as.character(as.matrix(cbind(facto,value))) colnames(theLog)=c("Model", "Value") file=paste("GAPIT.", name.of.trait,".Log.csv" ,sep = "") write.table(theLog, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) return (theLog) } #============================================================================================= `GAPIT.MAF` <- function(MAF=NULL,P=NULL,E=NULL,trait="",threshold.output=.1,plot.style="rainbow"){ #Object: To display probability and effect over MAF #Input: MAF vector of MAF #Input: P vector of P values #Output: A table and plot #Requirment: NA #Authors: Zhiwu Zhang # Start date: April 5, 2013 # Last update: Oct 27, 2015 by Jiabo Wang add notice for P<0.1 is empty ############################################################################################## #print("MAF plot started") #print(threshold.output) #Remove NAs and under threshold index= which(P 1024) {ncolors=1024} if(ncolors==-Inf) { print("There are no significant gene by this method(<0.1)") }else{ #print("MAF plot started 0001") #print(length(P)) #print(ncolors) #palette(rainbow(ncolors)) #palette(gray(seq(.9,0,len = ncolors))) #print("MAF plot started 0001b") pdf(paste("GAPIT.", trait,".MAF.pdf" ,sep = ""), width = 5,height=5) par(mar = c(5,6,5,3)) theColor=heat.colors(ncolors, alpha = 1) palette(rev(theColor)) plot(MAF,LP,type="p",lty = 1,lwd=2,col=LPC,xlab="MAF",ylab =expression(Probability~~-log[10](italic(p))),main = trait, cex.axis=1.1, cex.lab=1.3) #for(i in 2:nc){ #lines(power[,i]~FDR, lwd=2,type="o",pch=i,col=i) #} #legend("bottomright", colnames(power), pch = c(1:nc), lty = c(1,2),col=c(1:nc)) palette("default") # reset back to the default dev.off() } } #GAPIT.MAF ends here #============================================================================================= `GAPIT.Main` <- function(Y,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,CV.Inheritance=NULL,SNP.P3D=TRUE,GP=NULL,GK=NULL, group.from=1000000 ,group.to=1,group.by=10,kinship.cluster="average", kinship.group='Mean',kinship.algorithm=NULL,DPP=50000, ngrid = 100, llin = -10, ulim = 10, esp = 1e-10,GAPIT3.output=TRUE, file.path=NULL,file.from=NULL, file.to=NULL, file.total=NULL, file.fragment = 512, file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, SNP.MAF=0,FDR.Rate=1,SNP.FDR=1,SNP.effect="Add",SNP.impute="Middle",PCA.total=0, GAPIT.Version=GAPIT.Version, name.of.trait, GT = NULL, SNP.fraction = 1, seed = 123, BINS = 20,SNP.test=TRUE,SNP.robust="FaST", LD.chromosome=NULL,LD.location=NULL,LD.range=NULL,model=model, bin.from=10000,bin.to=5000000,bin.by=1000,inclosure.from=10,inclosure.to=1000,inclosure.by=10, SNP.permutation=FALSE,SNP.CV=NULL,NJtree.group=NJtree.group,NJtree.type=NJtree.type,plot.bin=plot.bin, genoFormat=NULL,hasGenotype=NULL,byFile=NULL,fullGD=NULL,PC=NULL,GI=NULL, Timmer = NULL, Memory = NULL, sangwich.top=NULL,sangwich.bottom=NULL,QC=TRUE,GTindex=NULL,LD=0.05, file.output=TRUE,cutOff=0.05, Model.selection = FALSE, Create.indicator = FALSE, QTN=NULL, QTN.round=1,QTN.limit=0, QTN.update=TRUE, QTN.method="Penalty", Major.allele.zero = FALSE, QTN.position=NULL,SUPER_GD=NULL,SUPER_GS=SUPER_GS,plot.style="Beach",CG=CG,chor_taxa=chor_taxa){ #Object: To perform GWAS and GPS (Genomic Prediction or Selection) #Output: GWAS table (text file), QQ plot (PDF), Manhattan plot (PDF), genomic prediction (text file), and # genetic and residual variance components #Authors: Zhiwu Zhang # Last update: Oct 23, 2015 by Jiabo Wang add REML threshold and SUPER GD KI ############################################################################################## #Initial p3d and h2.opt temporaryly h2.opt=NULL p3d=list( ps=NULL, REMLs=NULL, stats=NULL, effect.est=NULL, rsquare_base=NULL, rsquare=NULL, dfs=NULL, df=NULL, tvalue=NULL, stderr=NULL, maf=NULL, nobs=NULL, Timmer=NULL, Memory=NULL, vgs=NULL, ves=NULL, BLUP=NULL, BLUP_Plus_Mean=NULL, PEV=NULL, BLUE=NULL, logLM=NULL, effect.snp=NULL, effect.cv=NULL ) if (SUPER_GS) { Compression=NULL kinship.optimum=NULL kinship=NULL PC=PC REMLs=NULL GWAS=NULL QTN=NULL Timmer=GAPIT.Timmer(Infor="GAPIT.SUPER.GS") Memory=GAPIT.Memory(Infor="GAPIT.SUPER.GS") #print(model) SUPER_GS_GAPIT=GAPIT.SUPER.GS(Y=Y,G=G,GD=GD,GM=GM,KI=KI,Z=Z,CV=CV,GK=GK,kinship.algorithm=kinship.algorithm, bin.from=bin.from,bin.to=bin.to,bin.by=bin.by,inclosure.from=inclosure.from,inclosure.to=inclosure.to,inclosure.by=inclosure.by, group.from=group.from,group.to=group.to,group.by=group.by,kinship.cluster=kinship.cluster,kinship.group=kinship.group,name.of.trait=traitname, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF= SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,PCA.total=PCA.total,GAPIT.Version=GAPIT.Version, GT=GT, SNP.fraction = SNP.fraction, seed = seed, BINS = BINS,SNP.test=SNP.test,DPP=DPP, SNP.permutation=SNP.permutation, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,SNP.CV=SNP.CV,SNP.robust=SNP.robust,model=model, genoFormat=genoFormat,hasGenotype=hasGenotype,byFile=byFile,fullGD=fullGD,PC=PC,GI=GI,Timmer = Timmer, Memory = Memory, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,QC=QC,GTindex=GTindex,LD=LD,file.output=file.output,cutOff=cutOff ) print("SUPER_GS_GAPIT FUNCTION DONE") return (list(Compression=SUPER_GS_GAPIT$Compression,kinship.optimum=SUPER_GS_GAPIT$SUPER_kinship,kinship=SUPER_GS_GAPIT$kinship, PC=SUPER_GS_GAPIT$PC,GWAS=GWAS, GPS=SUPER_GS_GAPIT$GPS,Pred=SUPER_GS_GAPIT$Pred,Timmer=Timmer,Memory=Memory,SUPER_GD=SUPER_GS_GAPIT$SUPER_GD,GWAS=NULL,QTN=NULL)) }else{ #print("@@@@@@@") #print(group.from) #Handler of SNP.test=F #Iniciate with two by seven NA matrix #The seventh is for p values of SNP DTS=rbind(rep(NA,7),rep(NA,7) ) #End imediatly in one of these situtiona shortcut=FALSE LL.save=1e10 #In case of null Y and null GP, sent back genotype only thisY=Y[,2] thisY=thisY[!is.na(thisY)] if(length(thisY) <3){ shortcut=TRUE }else{ if(var(thisY) ==0) shortcut=TRUE } if(shortcut){ print(paste("Y is empty. No GWAS/GS performed for ",name.of.trait,sep="")) return (list(compression=NULL,kinship.optimum=NULL, kinship=KI,PC=PC,GWAS=NULL, GPS=NULL,Pred=NULL, REMLs=NULL,Timmer=Timmer,Memory=Memory,h2=NULL)) } #QC print("------------Examining data (QC)------------------------------------------") if(is.null(Y)) stop ("GAPIT says: Phenotypes must exist.") if(is.null(KI)&missing(GD) & kinship.algorithm!="SUPER") stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.") #When GT and GD are missing, force to have fake ones (creating them from Y),GI is not required in this case if(is.null(GD) & is.null(GT)) { GT=as.matrix(Y[,1]) GD=matrix(1,nrow(Y),1) GI=as.data.frame(matrix(0,1,3) ) colnames(GI)=c("SNP","Chromosome","Position") } if(is.null(GT)) { GT=as.character(Y[,1]) } #print("@@@@@@@@") #print(GD) #merge CV with PC: Put CV infront of PC if(PCA.total>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC) if(PCA.total>0&is.null(CV))CV=PC #for GS merge CV with GD name if (is.null(CV)) {my_allCV=CV }else{ taxa_GD=rownames(GD) my_allCV=CV[order(CV[,1]),] my_allCV=my_allCV[my_allCV[,1]%in%taxa_GD,] #print(dim(my_allCV)) } #Handler of CV.Inheritance if(is.null(CV) & !is.null(CV.Inheritance)){ stop ("GAPIT says: CV.Inheritance is more than avaiable.") } if(!is.null(CV)& !is.null(CV.Inheritance)){ if(CV.Inheritance>(ncol(CV)-1)) stop ("GAPIT says: CV.Inheritance is more than avaiable.") } #Create Z as identity matrix from Y if it is not provided if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & is.null(Z)){ taxa=as.character(Y[,1]) #this part will make GS without CV not present all prediction Z=as.data.frame(diag(1,nrow(Y))) #taxa=as.character(KI[,1]) #Z=as.data.frame(diag(1,nrow(KI))) Z=rbind(taxa,Z) taxa=c('Taxa',as.character(taxa)) Z=cbind(taxa,Z) } ZI=Z #Add the part of non proportion in Z matrix if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & !is.null(Z)) { if(nrow(Z)-1nY){snpsam=sample(1:nG,nY)}else{snpsam=1:nG} GK=GD[GTindex,snpsam] SNPVar=apply(as.matrix(GK),2,var) GK=GK[,SNPVar>0] GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa } #myGD=cbind(as.data.frame(GT),as.data.frame(GD)) file.output.temp=file.output file.output=FALSE #print(sangwich.top) GP=GAPIT.Bread(Y=Y,CV=CV,Z=Z,KI=KI,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.top,GTindex=GTindex,LD=LD,file.output=file.output)$GWAS file.output=file.output.temp GK=NULL print("-------------------Sagnwich top bun: done-----------------------------") } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="SagnwichTop") Memory=GAPIT.Memory(Memory=Memory,Infor="SagnwichTop") #Sandwich burger and dressing print("-------------------Sandwich burger and dressing------------------------") #Handler of group boundry if(group.from>group.to) stop("GAPIT says: group.to should be larger than group.from. Please correct them!") if(is.null(CV) | (!is.null(CV) & group.to<(ncol(CV)+1))) { #The minimum of group is 1 + number of columns in CV group.from=1 group.to=1 warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to a and GLM is performed!") } if(!is.null(CV)& group.from<1) { group.from=1 #minimum of group is number of columns in CV warning("The lower bound of groups should be 1 at least. It was set to 1!") } nk=1000000000 if(!is.null(KI)) nk=min(nk,nrow(KI)) if(!is.null(GK)) nk=min(nk,nrow(GK)) if(!is.null(KI)) { if(group.to>nk) { #group.to=min(nrow(KI),length(GTindex)) #maximum of group is number of rows in KI group.to=nk #maximum of group is number of rows in KI warning("The upper bound of groups is too high. It was set to the size of kinship!") } if(group.from>nk){ group.from=nk warning("The lower bound of groups is too high. It was set to the size of kinship!") } } if(!is.null(CV)){ if(group.to<=ncol(CV)+1) { #The minimum of group is number of columns in CV #group.from=ncol(CV)+2 #group.to=ncol(CV)+2 warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to their minimum and GLM is performed!") } } #bin.fold=ceiling(log2(bin.to/bin.from)) #bin.seq=0:bin.fold #bin.level=bin.from*2^bin.seq #Set upper bound for inclosure.to if(inclosure.to>nrow(Y))inclosure.to=nrow(Y)-1 #set inclosure loop levels bin.level=seq(bin.from,bin.to,by=bin.by) inclosure=seq(inclosure.from,inclosure.to,by=inclosure.by) #Optimization for group number, cluster algorithm and kinship type GROUP=seq(group.to,group.from,by=-group.by)#The reverse order is to make sure to include full model if(missing("kinship.cluster")) kinship.cluster=c("ward", "single", "complete", "average", "mcquitty", "median", "centroid") if(missing("kinship.group")) kinship.group=c("Mean", "Max", "Min", "Median") numSetting=length(GROUP)*length(kinship.cluster)*length(kinship.group)*length(bin.level)*length(inclosure) #Reform Y, GD and CV into EMMA format ys=as.matrix(Y[,2]) X0=as.matrix(CV[,-1]) CV.taxa=CVI[,1] #print(length(ys)) #Initial count=0 Compression=matrix(,numSetting,6) colnames(Compression)=c("Type","Cluster","Group","REML","VA","VE") #add indicator of overall mean if(min(X0[,1])!=max(X0[,1])) X0 <- cbind(1, X0) #do not add overall mean if X0 has it already at first column Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="DataProcessing") Memory=GAPIT.Memory(Memory=Memory,Infor="DataProcessing") print("-------------------------Iteration in process--------------------------") print(paste("Total iterations: ",numSetting,sep="")) #Loop to optimize cluster algorithm, group number and kinship type for (bin in bin.level){ for (inc in inclosure){ #Grill: update KI if GK or GP is provided if(!byPass & (!is.null(GK) | !is.null(GP))) { print("Grilling KI...") myGenotype<-GAPIT.Genotype(G=NULL,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,KI=NULL,kinship.algorithm=kinship.algorithm,PCA.total=0,SNP.fraction=SNP.fraction,SNP.test=SNP.test, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,kinship.cluster=kinship.cluster,NJtree.group=NJtree.group,NJtree.type=NJtree.type, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range, GP=GP,GK=GK,bin.size=bin,inclosure.size=inc,SNP.CV=SNP.CV, Timmer = Timmer, Memory = Memory,GTindex=GTindex,sangwich.top=NULL,sangwich.bottom=sangwich.bottom, file.output=file.output, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) Timmer=myGenotype$Timmer Memory=myGenotype$Memory KI=myGenotype$KI #update group set by new KI nk=nrow(KI) GROUP=GROUP[GROUP<=nk] } for (ca in kinship.cluster){ for (group in GROUP){ for (kt in kinship.group){ #Do not screen SNP unless existing genotype and one combination if(numSetting==1 & hasGenotype){ optOnly=FALSE }else{ optOnly=TRUE } if(!SNP.test) optOnly=TRUE if(optOnly | Model.selection){ colInclude=1 optOnly = TRUE }else{ colInclude=c(1:ncol(GD)) } if(!optOnly) {print("Compressing and Genome screening..." )} count=count+1 #Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 1") #Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 1") if(!byPass) { if(count==1)print("-------Mixed model with Kinship-----------------------------") if(group0] SUPER_GD=SUPER_GD[,SNPVar>0] GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa SUPER_GD=cbind(as.data.frame(GT),as.data.frame(SUPER_GD)) #add taxa #GP=NULL }# end of if(is.null(GK)) if(!is.null(GK) & numSetting>1) { print("-------Calculating likelihood-----------------------------------") # myBurger=GAPIT.Burger(Y=Y,CV=CV,GK=GK) myBurger=GAPIT.Burger(Y=Y,CV=NULL,GK=GK) #########modified by Jiabo Wang myREML=myBurger$REMLs myVG=myBurger$vg myVE=myBurger$ve }else{ myREML=NA myVG=NA myVE=NA } #Recoding the optimum GK if(count==1){ GK.save=GK LL.save=myREML SUPER_optimum_GD=SUPER_GD ########### get SUPER GD }else{ if(myREML1){ Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),] #sort on REML kt=Compression[1,1] ca=Compression[1,2] group=Compression[1,3] } cp <- GAPIT.Compress(KI=KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory) Timmer=cp$Timmer Memory=cp$Memory Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp") bk <- GAPIT.Block(Z=Z,GA=cp$GA,KG=cp$KG) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk") zc <- GAPIT.ZmatrixCompress(Z=Z,GAU =bk$GA) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc") z0=as.matrix(zc$Z[,-1]) Z1=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0)) BIC <- rep(NA,ncol(X0)) LogLike <- rep(NA, ncol(X0)) for(i in 1:ncol(X0)){#1 because the first column of X0 is the intercept X0.test <- as.matrix(X0[,1:i]) #print("The dim of bk$KW is ") #print(dim(bk$KW)) #print(dim(X0.test)) #print(dim(CVI)) p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[,1])),K = as.matrix(bk$KW) ,Z=Z1,X0=X0.test,CVI=CVI,CV.Inheritance=CV.Inheritance,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD, SNP.permutation=SNP.permutation, GP=GP, file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, GTindex=GTindex,genoFormat=genoFormat,optOnly=TRUE,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) k.num.param <- 2+i #k is (i-1) because we have the following parameters in the likelihood function: # intercept # (i-1) covariates # sigma_g # delta #print(paste("The value of round(p3d$REMLs,5) is ", round(p3d$REMLs,5), sep = "")) #print(paste("The value of log(GTindex) is ", log(GTindex), sep = "")) #print(paste("The value of 0.5*k.num.param*log(GTindex) is ", 0.5*k.num.param*log(nrow(Z1)), sep = "")) LogLike[i] <- p3d$logLM BIC[i] <- p3d$logLM -(0.5*k.num.param*log(nrow(Z1))) #print("The value of k.num.param is: ") #print(k.num.param) #print(paste("The value of nrow(Z1) is ", nrow(Z1), sep = "")) } Optimum.from.BIC <- which(BIC == max(BIC)) print(paste("-----------------------The optimal number of PCs/covariates is ", (Optimum.from.BIC-1)," -------------------------", sep = "")) BIC.Vector <- cbind(as.matrix(rep(0:(ncol(X0)-1))), as.matrix(BIC), as.matrix(LogLike)) #print(seq(0:ncol(X0))) #print(BIC.Vector) colnames(BIC.Vector) <- c("Number of PCs/Covariates", "BIC (larger is better) - Schwarz 1978", "log Likelihood Function Value") write.table(BIC.Vector, paste("GAPIT.", name.of.trait, ".BIC.Model.Selection.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #print(BIC.Vector) X0 <- X0[,1:(Optimum.from.BIC)] if(Optimum.from.BIC == 1){ X0 <- as.matrix(X0) } print("The dimension of X0 after model selection is:") print(dim(X0)) print("The head of X0 after model selection is") print(head(X0)) } # where does it start: 522 print("---------------------Sandwich bottom bun-------------------------------") print("Compression") print(Compression) #Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression") #Memory=GAPIT.Memory(Memory=Memory,Infor="Copmression") if(numSetting==1) { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS") Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS") } #Perform GWAS with the optimum setting #This section is omited if there is only one setting if((numSetting>1)| (!is.null(sangwich.bottom)&!byPass) | Model.selection) { print("Genomic screening..." ) optOnly=FALSE #set default to false and change it to TRUE in these situations: if(!hasGenotype) optOnly=TRUE if(!SNP.test) optOnly=TRUE if(optOnly){ colInclude=1 }else{ colInclude=c(1:ncol(GD)) } if(numSetting>1){ #Find the best ca,kt and group print(paste(as.numeric(Compression[1,4]))) ###added by Jiabo Wang 2015.7.20 print(paste(min(as.numeric(Compression[,4]),rm.na=TRUE))) adjust_value=as.numeric(Compression[1,4])-min(as.numeric(Compression[,4]),rm.na=TRUE) nocompress_value=as.numeric(Compression[1,4]) REML_storage=as.numeric(Compression[,4]) adjust_mean=mean(as.numeric(Compression[,4]),rm.na=TRUE) threshold=adjust_mean*0.1 if(adjust_value<3|nocompress_value<0) ###added by Jiabo Wang 2015.7.20 { kt=Compression[1,1] ca=Compression[1,2] group=Compression[1,3] print(paste("Optimum: ",Compression[1,2],Compression[1,1],Compression[1,3],Compression[1,5], Compression[1,6],Compression[1,4] ,sep = " ")) }else{ Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),] #sort on REML kt=Compression[1,1] ca=Compression[1,2] group=Compression[1,3] print(paste("Optimum: ",Compression[1,2],Compression[1,1],Compression[1,3],Compression[1,5], Compression[1,6],Compression[1,4] ,sep = " ")) } }#end if(numSetting>1) print("-------------- Sandwich bottom ------------------------") if(!byPass) { print("-------------- Sandwich bottom with raw burger------------------------") if(Model.selection == FALSE){ #update KI with the best likelihood if(is.null(sangwich.bottom)) KI=KI.save cp <- GAPIT.Compress(KI=KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory) Timmer=cp$Timmer Memory=cp$Memory Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp") bk <- GAPIT.Block(Z=Z,GA=cp$GA,KG=cp$KG) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk") zc <- GAPIT.ZmatrixCompress(Z=Z,GAU =bk$GA) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc") Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc") #Reform KW and Z into EMMA format z0=as.matrix(zc$Z[,-1]) Z1=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0)) } print("--------------EMMAxP3D with the optimum setting-----------------------") #print(dim(ys)) #print(dim(as.matrix(as.data.frame(GD[GTindex,colInclude])))) p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,colInclude])) ,K = as.matrix(bk$KW) ,Z=Z1,X0=as.matrix(X0),CVI=CVI, CV.Inheritance=CV.Inheritance,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD, SNP.permutation=SNP.permutation, GP=GP, file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, GTindex=GTindex,genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS") Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS") print("--------------EMMAxP3D with the optimum setting done------------------") }#end of if(!byPass) }#end of if(numSetting>1 & hasGenotype & !SNP.test) #print("Screening wiht the optimum setting done") if(byPass) { print("---------------Sandwich bottom with grilled burger---------------------") print("---------------Sandwich bottom: reload bins ---------------------------") #SUPER: Final screening GK=GK.save myBread=GAPIT.Bread(Y=Y,CV=CV,Z=Z,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.bottom,GTindex=GTindex,LD=LD,file.output=file.output) print("SUPER saving results...") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS") Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS") } #end of if(byPass) print("--------------------Final results presentations------------------------") #Plotting optimum group kinship if(!byPass) { if(length(bk$KW)>1 &length(bk$KW)ncol(X0)) { gs <- GAPIT.GS(KW=bk$KW,KO=bk$KO,KWO=bk$KWO,GAU=bk$GAU,UW=cbind(p3d$BLUP,p3d$PEV)) } print("Writing GBV and Acc..." ) GPS=NULL if(length(bk$KW)>ncol(X0)) GPS=gs$BLUP Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GPS") Memory=GAPIT.Memory(Memory=Memory,Infor="GPS") #Make heatmap for distribution of BLUP and PEV print("GBV and accuracy distribution..." ) if(length(bk$KW)>ncol(X0) &file.output) { GAPIT.GS.Visualization(gsBLUP = gs$BLUP, BINS=BINS,name.of.trait = name.of.trait) } #Make a plot Summarzing the Compression Results, if more than one "compression level" has been assessed print("Compression portfolios..." ) #print(Compression) if(file.output) GAPIT.Compression.Visualization(Compression = Compression, name.of.trait = name.of.trait) print("Compression Visualization done") if(length(Compression)<1){ h2.opt= NULL }else{ if(length(Compression)<=6) Compression=t(as.matrix(Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),])) if(length(Compression)==6) Compression=matrix(Compression,1,6) if(length(Compression)>6) Compression=Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),] Compression.best=Compression[1,] variance=as.numeric(Compression.best[5:6]) varp=variance/sum(variance) h2.opt= varp[1] } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression.Visualization") Memory=GAPIT.Memory(Memory=Memory,Infor="Compression.Visualization") # print("$$$$$") # print(str(p3d)) ps=p3d$ps nobs=p3d$nobs maf=p3d$maf rsquare_base=p3d$rsquare_base rsquare=p3d$rsquare df=p3d$df tvalue=p3d$tvalue stderr=p3d$stderr effect.est=p3d$effect.est Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract p3d results") Memory=GAPIT.Memory(Memory=Memory,Infor="Extract p3d results") print("p3d objects transfered") #where does it start: 936 }else{ #byPass #print("The head of myBread$GWAS is") #print(head(myBread$GWAS)) GPS=myBread$BLUP ps=myBread$GWAS[,4] nobs=myBread$GWAS[,6] #print(dim(GI)) #print(head()) Bread_index=match(as.character(myBread$GWAS[,1]),as.character(GI[,1])) #print(GD[1:5,1:5]) Bread_X=GD[,Bread_index] #print(dim(Bread_X)) maf=apply(Bread_X,2,function(one) abs(1-sum(one)/(2*nrow(Bread_X)))) maf[maf>0.5]=1-maf[maf>0.5] rsquare_base=rep(NA,length(ps)) rsquare=rep(NA,length(ps)) df=rep(NA,length(nobs)) tvalue=rep(NA,length(nobs)) stderr=rep(NA,length(nobs)) effect.est=rep(NA,length(nobs)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract bread results") Memory=GAPIT.Memory(Memory=Memory,Infor="Extract bread results") } print("Merge BLUP and BLUE") #print(head(ps)) #Merge BLUP and BLUE Pred=NULL if((!byPass)&(!Model.selection)){ print("GAPIT before BLUP and BLUE") #print(dim(p3d$BLUE)) BLUE=data.frame(cbind(data.frame(CV.taxa),data.frame(p3d$BLUE))) colnames(BLUE)=c("Taxa","BLUE") #Initial BLUP as BLUe and add additional columns gs.blup=cbind(BLUE,NA,NA,0,NA) if(!is.null(gs))gs.blup=gs$BLUP BB= merge(gs.blup, BLUE, by.x = "Taxa", by.y = "Taxa") if (is.null(my_allCV)){my_allX=matrix(1,length(my_taxa),1) }else{ # my_allX=as.matrix(my_allCV[,-1]) my_allX=cbind(1,as.matrix(my_allCV[,-1])) } #print(dim(my_allX)) #print(head(my_allX)) #print(dim(BB)) #print(CV.Inheritance) if(is.null(CV.Inheritance)) { Prediction=BB[,5]+BB[,7] Pred_Heritable=Prediction } if(!is.null(CV.Inheritance)) { #inher_CV=my_allX[,1:(1+CV.Inheritance)] #beta.Inheritance=p3d$effect.cv[1:(1+CV.Inheritance)] #print(beta.Inheritance) #if(length(beta)==1)CV=X all_BLUE=try(my_allX%*%p3d$effect.cv,silent=T) if(inherits(BLUE, "try-error")) all_BLUE = NA Pred_Heritable=BB[,5]+BB[,7] Prediction=BB[,5]+all_BLUE } #print("@@@@@@@@@@") #print(dim(CVI)) #print(BB) #CV.Inheritance #Pred_Heritable=p3d$effect.cv[CV.Inheritance]%*%CVI[CV.Inheritance]+BB[,7] Pred=data.frame(cbind(BB,data.frame(Prediction)),data.frame(Pred_Heritable)) if(noCV) { if(NOBLUP) {Pred=NA }else{ BLUE=Pred$BLUE[1] prediction=as.matrix(GPS$BLUP)+(BLUE) Pred=cbind(GPS,BLUE,prediction) colnames(Pred)=c("Taxa","Group","RefInf","ID","BLUP","PEV","BLUE","Prediction") }#end NOBLUP }#end noCV print("GAPIT after BLUP and BLUE") } #Export BLUP and PEV if(!byPass &GAPIT3.output) { print("Exporting BLUP and Pred") #try(write.table(gs$BLUP, paste("GAPIT.", name.of.trait,".BLUP.csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)) try(write.table(Pred, paste("GAPIT.", name.of.trait,".PRED.csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)) } if(byPass) { theK.back=NULL }else{ theK.back=cp$KG } if(byPass)Compression[1,4]=0 #create a fake value to aloow output of SUPER #Export GWAS results PWI.Filtered=NULL if(hasGenotype &SNP.test &!is.na(Compression[1,4])) #require not NA REML { Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract GWAS start") Memory=GAPIT.Memory(Memory=Memory,Infor="Extract GWAS start") #print("Filtering SNPs with MAF..." ) #index=maf>=SNP.MAF PWI.Filtered=cbind(GI,ps,maf,nobs,rsquare_base,rsquare,effect.est)#[index,] #print(dim(PWI.Filtered)) colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs", "Rsquare.of.Model.without.SNP","Rsquare.of.Model.with.SNP","effect") if(!byPass){ if(Create.indicator){ #Add a counter column for GI GI.counter <- cbind(GI, seq(1:nrow(GI))) #Turn GI and effect.est into data frames GI.counter.data.frame <- data.frame(GI.counter) colnames(GI.counter.data.frame) <- c("X1", "X2", "X3", "X4") effect.est.data.frame <- data.frame(effect.est) colnames(effect.est.data.frame) <- c("X1", "X2", "X3") print(head(GI.counter.data.frame)) print(head(effect.est.data.frame)) #Do a merge statement GWAS.2 <- merge(GI.counter.data.frame, effect.est.data.frame, by.x = "X4", by.y = "X1") #Remove the counter column GWAS.2 <- GWAS.2[,-1] #Add column names colnames(GWAS.2) <- c("SNP","Chromosome","Position ", "Genotype", "Allelic Effect Estimate") } if(!Create.indicator){ GWAS.2 <- PWI.Filtered[,c(1:3,9)] colnames(GWAS.2) <- c("SNP","Chromosome","Position ", "Allelic Effect Estimate") } } Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="MAF filtered") Memory=GAPIT.Memory(Memory=Memory,Infor="MAF filtered") #print("SNPs filtered with MAF") if(!is.null(PWI.Filtered)) { #Run the BH multiple correction procedure of the results #Create PWIP, which is a table of SNP Names, Chromosome, bp Position, Raw P-values, FDR Adjusted P-values #print("Calculating FDR..." ) PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH") Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Multiple Correction") Memory=GAPIT.Memory(Memory=Memory,Infor="Multiple Correction") #QQ plots #print("QQ plot..." ) if(file.output) GAPIT.QQ(P.values = PWIP$PWIP[,4], name.of.trait = name.of.trait,DPP=DPP) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QQ plot") Memory=GAPIT.Memory(Memory=Memory,Infor="QQ plot") #Manhattan Plots #print("Manhattan plot (Genomewise)..." ) # if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff) # if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=QTN.position) #QTN does not work with sorted P if(file.output) GAPIT.Manhattan(GI.MP = PWI.Filtered[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=QTN.position,plot.style=plot.style,plot.bin=plot.bin,chor_taxa=chor_taxa) #print("Manhattan plot (Chromosomewise)..." ) #if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff) if(file.output&SNP.fraction==1) GAPIT.Manhattan(GI.MP = PWI.Filtered[,2:4],GD=GD,CG=CG, name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff,plot.bin=plot.bin,chor_taxa=chor_taxa) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Manhattan plot") Memory=GAPIT.Memory(Memory=Memory,Infor="Manhattan plot") #Association Table #print("Association table..." ) #print(dim(PWIP$PWIP)) #GAPIT.Table(final.table = PWIP$PWIP, name.of.trait = name.of.trait,SNP.FDR=SNP.FDR) #print(head(PWIP$PWIP)) GWAS=PWIP$PWIP[PWIP$PWIP[,9]<=SNP.FDR,] #print("Joining tvalue and stderr" ) DTS=cbind(GI,df,tvalue,stderr,effect.est) colnames(DTS)=c("SNP","Chromosome","Position","DF","t Value","std Error","effect") #print("Creating ROC table and plot" ) if(file.output) myROC=GAPIT.ROC(t=tvalue,se=stderr,Vp=var(ys),trait=name.of.trait) #print("ROC table and plot created" ) #MAF plots #print("MAF plot..." ) if(file.output) myMAF1=GAPIT.MAF(MAF=GWAS[,5],P=GWAS[,4],E=NULL,trait=name.of.trait) #print(dim(GWAS)) if(file.output){ write.table(GWAS, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) write.table(DTS, paste("GAPIT.", name.of.trait, ".Df.tValue.StdErr.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) if(!byPass) write.table(GWAS.2, paste("GAPIT.", name.of.trait, ".Allelic_Effect_Estimates.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) } } #end of if(!is.null(PWI.Filtered)) Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract GWAS end") Memory=GAPIT.Memory(Memory=Memory,Infor="Extract GWAS end") } #end of if(hasGenotype ) #Log if(GAPIT3.output) log=GAPIT.Log(Y=Y,KI=KI,Z=Z,CV=CV,SNP.P3D=SNP.P3D, group.from = group.from ,group.to =group.to ,group.by = group.by ,kinship.cluster = kinship.cluster, kinship.group= kinship.group, ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait) #Memory usage #GAPIT.Memory.Object(name.of.trait=name.of.trait) #Timming Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Report") Memory=GAPIT.Memory(Memory=Memory,Infor="Report") if(file.output){ file=paste("GAPIT.", name.of.trait,".Timming.csv" ,sep = "") write.table(Timmer, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) file=paste("GAPIT.", name.of.trait,".Memory.Stage.csv" ,sep = "") write.table(Memory, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) } print(paste(name.of.trait, "has been analyzed successfully!") ) print(paste("The results are saved in the directory of ", getwd()) ) #print("==========================================================================================") TV<-list() TV$ps=ps TV$nobs=nobs TV$maf=maf TV$rsquare_base=rsquare_base TV$rsquare=rsquare TV$df=df TV$tvalue=tvalue TV$stderr=stderr TV$effect.est=effect.est #print("!!!!!!!!!!!!!") #print(head(effect.est)) #print(head(DTS[,7])) #print(ys) if(byPass | Model.selection) Pred <- NA print("before ending GAPIT.Main") #print(dim(Compression)) return (list(Timmer=Timmer,Compression=Compression,kinship.optimum=theK.back, kinship=KI,PC=PC,GWAS=PWI.Filtered, GPS=GPS,Pred=Pred,REMLs=Compression[count,4],Timmer=Timmer,Memory=Memory,SUPER_GD=SUPER_GD,P=ps,effect.snp=DTS[,7],effect.cv=p3d$effect.cv,h2= h2.opt,TV=TV)) } #end if non-SUPER.GS situation, this is a long if statement, structure needs improvement }#The function GAPIT.Main ends here #============================================================================================= `GAPIT.Manhattan` <- function(GI.MP = NULL,GD=NULL,name.of.trait = "Trait",plot.type = "Genomewise", DPP=50000,cutOff=0.01,band=5,seqQTN=NULL,plot.style="Oceanic",CG=NULL,plot.bin=10^9,chor_taxa=NULL){ #Object: Make a Manhattan Plot #Options for plot.type = "Separate_Graph_for_Each_Chromosome" and "Same_Graph_for_Each_Chromosome" #Output: A pdf of the Manhattan Plot #Authors: Alex Lipka, Zhiwu Zhang, Meng Li and Jiabo Wang # Last update: Oct 10, 2016 #Add r2 between candidata SNP and other markers in on choromosome ############################################################################################## #print("Manhattan ploting...") #print(cutOff) #do nothing if null input if(is.null(GI.MP)) return #if(is.null(GD)) return #print("Dimension of GI.MP") #print(dim(GI.MP)) #print(head(GI.MP)) #print(tail(GI.MP)) #print(CG) #seqQTN=c(300,1000,2500) #Handler of lable paosition only indicated by negatie position position.only=F if(!is.null(seqQTN)){ if(seqQTN[1]<0){ seqQTN=-seqQTN position.only=T } } #if(is.null(GD)) print ("GD is not same dim as GM") borrowSlot=4 GI.MP[,borrowSlot]=0 GI.MP[,5]=1:(nrow(GI.MP)) GI.MP=matrix(as.numeric(as.matrix(GI.MP) ) ,nrow(GI.MP),ncol(GI.MP)) GI.MP=GI.MP[order(GI.MP[,2]),] GI.MP=GI.MP[order(GI.MP[,1]),] # print("@@@@@") # print(head(GI.MP)) #Inicial as 0 if(!is.null(seqQTN))GI.MP[seqQTN,borrowSlot]=1 if(!is.null(GD)) { if(ncol(GD)!=nrow(GI.MP))print("GD does not mach GM in Manhattan !!!") } #print(ncol(GD)) #print(nrow(GI.MP)) #print(GI.MP) #print("!!") #GI.MP[,5]=1:(nrow(GI.MP)) #print(head(GI.MP,20)) #Remove all SNPs that do not have a choromosome, bp position and p value(NA) GI.MP <- GI.MP[!is.na(GI.MP[,1]),] GI.MP <- GI.MP[!is.na(GI.MP[,2]),] if(!is.null(GD)) GD=GD[,!is.na(GI.MP[,3])] GI.MP <- GI.MP[!is.na(GI.MP[,3]),] #Retain SNPs that have P values between 0 and 1 (not na etc) if(!is.null(GD)) GD=GD[,GI.MP[,3]>0] GI.MP <- GI.MP[GI.MP[,3]>0,] if(!is.null(GD)) GD=GD[,GI.MP[,3]<=1] GI.MP <- GI.MP[GI.MP[,3]<=1,] #Remove chr 0 and 99 GI.MP <- GI.MP[GI.MP[,1]!=0,] #GI.MP <- GI.MP[GI.MP[,1]!=99,] #print(dim(GI.MP)) #print("Dimension of GI.MP after QC") #print(dim(GI.MP)) #print(head(GI.MP)) numMarker=nrow(GI.MP) #print(numMarker) bonferroniCutOff=-log10(cutOff/numMarker) #Replace P the -log10 of the P-values if(!is.null(GD)) { if(ncol(GD)!=nrow(GI.MP)) {print("GD does not match GM in Manhattan !!!") return }} #print(ncol(GD)) #print(nrow(GI.MP)) GI.MP[,3] <- -log10(GI.MP[,3]) index_GI=GI.MP[,3]>0 GI.MP <- GI.MP[index_GI,] if(!is.null(GD)) GD=GD[,index_GI] GI.MP[,5]=1:(nrow(GI.MP)) y.lim <- ceiling(max(GI.MP[,3])) chm.to.analyze <- unique(GI.MP[,1]) #print(dim(GI.MP)) #print(dim(GD)) #print("name of chromosomes:") #print(chm.to.analyze) chm.to.analyze=chm.to.analyze[order(chm.to.analyze)] numCHR= length(chm.to.analyze) #GI.MP[,5]=1:(nrow(GI.MP)) bin.mp=GI.MP[,1:3] bin.mp[,3]=0 # for r2 bin.mp[,1]=as.numeric(as.vector(GI.MP[,2]))+as.numeric(as.vector(GI.MP[,1]))*(10^(max(GI.MP[,1])+1)) #as.numeric(as.vector(GP[,3]))+as.numeric(as.vector(GP[,2]))*MaxBP #print(head(bin.mp)) bin.mp[,2]=floor(bin.mp[,1]/plot.bin) if(!is.null(GD)) X=GD #print(head(bin.mp)) #Chromosomewise plot if(plot.type == "Chromosomewise"&!is.null(GD)) { #print("Manhattan ploting Chromosomewise") GI.MP=cbind(GI.MP,bin.mp) pdf(paste("GAPIT.", name.of.trait,".Manhattan.Plot.Chromosomewise.pdf" ,sep = ""), width = 10) #par(mar = c(5,5,4,3), lab = c(8,5,7)) layout(matrix(c(1,1,2,1,1,1,1,1,1),3,3,byrow=TRUE), c(2,1), c(1,1), TRUE) for(i in 1:numCHR) { #Extract SBP on this chromosome subset=GI.MP[GI.MP[,1]==chm.to.analyze[i],,drop=FALSE] # print(head(subset)) if(nrow(subset)==0)next #thanks to lvclark to fix it subset[,1]=1:(nrow(subset)) #sub.bin.mp=bin.mp[GI.MP[,1]==chm.to.analyze[i],] #subset=cbind(subset,sub.bin.mp) sig.mp=subset[subset[,3]>bonferroniCutOff,,drop=FALSE] sig.index=subset[,3]>bonferroniCutOff ### index of significont SNP num.row=nrow(sig.mp) if(!is.null(dim(sig.mp)))sig.mp=sig.mp[!duplicated(sig.mp[,7]),] num.row=nrow(sig.mp) if(is.null(dim(sig.mp))) num.row=1 bin.set=NULL r2_color=matrix(0,nrow(subset),2) #r2_color print(paste("select ",num.row," candidate significont markers in ",i," chromosome ",sep="") ) #print(sig.mp) if(length(unique(sig.index))==2) { for(j in 1:num.row) { sig.mp=matrix(sig.mp,num.row,8) #print(sig.mp[j,7]) #print(unique(subset[,7])) bin.store=subset[which(subset[,7]==sig.mp[j,7]),] if(is.null(dim(bin.store))) {subset[which(subset[,7]==sig.mp[j,7]),8]=1 next } bin.index=unique(bin.store[,5]) subGD=X[,bin.store[,5]] #print(dim(bin.store)) if(is.null(CG))candidata=bin.store[bin.store[,3]==max(bin.store[,3]),5] if(length(candidata)!=1)candidata=candidata[1] for (k in 1:ncol(subGD)) { r2=cor(X[,candidata],subGD[,k])^2 #print(r2) bin.store[k,8]=r2 } #print(bin.store) #r2_storage[is.na(r2_storage)]=0 #print(bin.store) subset[bin.store[,1],8]=bin.store[,8] #print() }###end for each sig.mp #sub.bin.mp=bin.mp[subset[,3]>bonferroniCutOff,] #print(head(bin.set)) }###end if empty of sig.mp #print("@@@@@@@@@@@@@@@@") rm(sig.mp,num.row) #print(head(subset)) #print(head(subset)) #print(dim(X)) y.lim <- ceiling(max(subset[,3]))+1 #set upper for each chr if(length(subset)>3){ x <- as.numeric(subset[,2])/10^(6) y <- as.numeric(subset[,3]) }else{ x <- as.numeric(subset[2])/10^(6) y <- as.numeric(subset[3]) } ##print(paste("befor prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x))) n_col=10 r2_color[,2]=subset[,8] do_color=colorRampPalette(c("orangeRed", "blue"))(n_col) #Prune most non important SNPs off the plots order=order(y,decreasing = TRUE) y=y[order] x=x[order] r2_color=r2_color[order,,drop=FALSE] index=GAPIT.Pruning(y,DPP=round(DPP/numCHR)) x=x[index] y=y[index] r2_color=r2_color[index,,drop=FALSE] r2_color[which(r2_color[,2]<=0.2),2]=do_color[n_col] r2_color[which(r2_color[,2]<=0.4&r2_color[,2]>0.2),2]=do_color[n_col*0.8] r2_color[which(r2_color[,2]<=0.6&r2_color[,2]>0.4),2]=do_color[n_col*0.6] r2_color[which(r2_color[,2]<=0.8&r2_color[,2]>0.6),2]=do_color[n_col*0.4] r2_color[which(r2_color[,2]<=1&r2_color[,2]>0.8),2]=do_color[n_col/n_col] #print(unique(r2_color[,2])) ##print(paste("after prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x))) par(mar=c(0,0,0,0)) par(mar=c(5,5,2,1),cex=0.8) plot(y~x,type="p", ylim=c(0,y.lim), xlim = c(min(x), max(x)), col = r2_color[,2], xlab = expression(Base~Pairs~(x10^-6)), ylab = "-Log Base 10 p-value", main = paste("Chromosome",chm.to.analyze[i],sep=" "), cex.lab=1.6,pch=21,bg=r2_color[,2]) abline(h=bonferroniCutOff,col="forestgreen") ##print("manhattan plot (chr) finished") #layout.show(nf) #provcol <-c("darkblue","cyan","green3","brown1","brown1") #provcol <-heat.colors(50) #par(mar=c(0,0,0,0)) par(mar=c(15,5,6,5),cex=0.5) barplot(matrix(rep(1,times=n_col),n_col,1),beside=T,col=do_color,border=do_color,axes=FALSE,) #legend(x=10,y=2,legend=expression(R^"2"),,lty=0,cex=1.3,bty="n",bg=par("bg")) axis(3,seq(11,1,by=-2),seq(0,1,by=0.2)) }# end plot.type == "Chromosomewise"&!is.null(GD) dev.off() print("manhattan plot on chromosome finished") } #Chromosomewise plot #Genomewise plot if(plot.type == "Genomewise") { #print("Manhattan ploting Genomewise") #Set corlos for chromosomes #nchr=max(chm.to.analyze) nchr=length(chm.to.analyze) #Set color schem ncycle=ceiling(nchr/band) ncolor=band*ncycle #palette(rainbow(ncolor+1)) cycle1=seq(1,nchr,by= ncycle) thecolor=cycle1 for(i in 2:ncycle){thecolor=c(thecolor,cycle1+(i-1))} col.Rainbow=rainbow(ncolor+1)[thecolor] col.FarmCPU=rep(c("#CC6600","deepskyblue","orange","forestgreen","indianred3"),ceiling(numCHR/5)) col.Rushville=rep(c("orangered","navyblue"),ceiling(numCHR/2)) col.Congress=rep(c("deepskyblue3","firebrick"),ceiling(numCHR/2)) col.Ocean=rep(c("steelblue4","cyan3"),ceiling(numCHR/2)) col.PLINK=rep(c("gray10","gray70"),ceiling(numCHR/2)) col.Beach=rep(c("turquoise4","indianred3","darkolivegreen3","red","aquamarine3","darkgoldenrod"),ceiling(numCHR/5)) #col.Oceanic=rep(c( '#EC5f67', '#F99157', '#FAC863', '#99C794', '#5FB3B3', '#6699CC', '#C594C5', '#AB7967'),ceiling(numCHR/8)) #col.Oceanic=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5', '#AB7967'),ceiling(numCHR/6)) col.Oceanic=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(numCHR/5)) col.cougars=rep(c( '#990000', 'dimgray'),ceiling(numCHR/2)) if(plot.style=="Rainbow")plot.color= col.Rainbow if(plot.style =="FarmCPU")plot.color= col.Rainbow if(plot.style =="Rushville")plot.color= col.Rushville if(plot.style =="Congress")plot.color= col.Congress if(plot.style =="Ocean")plot.color= col.Ocean if(plot.style =="PLINK")plot.color= col.PLINK if(plot.style =="Beach")plot.color= col.Beach if(plot.style =="Oceanic")plot.color= col.Oceanic if(plot.style =="cougars")plot.color= col.cougars #FarmCPU uses filled dots mypch=1 if(plot.style =="FarmCPU")mypch=20 GI.MP <- GI.MP[order(GI.MP[,2]),] GI.MP <- GI.MP[order(GI.MP[,1]),] ticks=NULL lastbase=0 #print("Manhattan data sorted") #print(chm.to.analyze) #change base position to accumulatives (ticks) for (i in chm.to.analyze) { index=(GI.MP[,1]==i) ticks <- c(ticks, lastbase+mean(GI.MP[index,2])) GI.MP[index,2]=GI.MP[index,2]+lastbase lastbase=max(GI.MP[index,2]) } #print("Manhattan chr processed") #print(length(index)) #print(length(ticks)) #print((ticks)) #print((lastbase)) x0 <- as.numeric(GI.MP[,2]) y0 <- as.numeric(GI.MP[,3]) z0 <- as.numeric(GI.MP[,1]) position=order(y0,decreasing = TRUE) index0=GAPIT.Pruning(y0[position],DPP=DPP) index=position[index0] x=x0[index] y=y0[index] z=z0[index] #Extract QTN QTN=GI.MP[which(GI.MP[,borrowSlot]==1),] #print(QTN) #Draw circles with same size and different thikness size=1 #1 ratio=10 #5 base=1 #1 themax=ceiling(max(y)) themin=floor(min(y)) wd=((y-themin+base)/(themax-themin+base))*size*ratio s=size-wd/ratio/2 #print("Manhattan XY created") ####xiaolei update on 2016/01/09 if(plot.style =="FarmCPU"){ pdf(paste("FarmCPU.", name.of.trait,".Manhattan.Plot.Genomewise.pdf" ,sep = ""), width = 13,height=5.75) }else{ pdf(paste("GAPIT.", name.of.trait,".Manhattan.Plot.Genomewise.pdf" ,sep = ""), width = 13,height=5.75) } par(mar = c(3,6,5,1)) plot(y~x,xlab="",ylab=expression(-log[10](italic(p))) , cex.axis=1.5, cex.lab=2, ,col=plot.color[z],axes=FALSE,type = "p",pch=mypch,lwd=wd,cex=s+.3,main = paste(name.of.trait,sep=" "),cex.main=2.5) #Label QTN positions if(is.vector(QTN)){ if(position.only){abline(v=QTN[2], lty = 2, lwd=1.5, col = "grey")}else{ points(QTN[2], QTN[3], type="p",pch=21, cex=2,lwd=1.5,col="dimgrey") points(QTN[2], QTN[3], type="p",pch=20, cex=1,lwd=1.5,col="dimgrey") } }else{ if(position.only){abline(v=QTN[,2], lty = 2, lwd=1.5, col = "grey")}else{ points(QTN[,2], QTN[,3], type="p",pch=21, cex=2,lwd=1.5,col="dimgrey") points(QTN[,2], QTN[,3], type="p",pch=20, cex=1,lwd=1.5,col="dimgrey") } } #Add a horizontal line for bonferroniCutOff abline(h=bonferroniCutOff,col="forestgreen") #print(bonferroniCutOff) #Set axises # jiabo creat chor_taxa #print(chor_taxa) if(length(chor_taxa)!=length(ticks))chor_taxa=NULL #print(unique(GI.MP[,1])) if(!is.null(chor_taxa)) {axis(1, at=ticks,cex.axis=1.5,labels=chor_taxa,tick=F) }else{axis(1, at=ticks,cex.axis=1.5,labels=chm.to.analyze,tick=F)} axis(2, at=1:themax,cex.axis=1.5,labels=1:themax,tick=F) box() palette("default") dev.off() #print("Manhattan done Genomewise") } #Genomewise plot print("GAPIT.Manhattan accomplished successfully!zw") } #end of GAPIT.Manhattan #============================================================================================= `GAPIT.Memory.Object` <- function(name.of.trait="Trait"){ # Object: To report memoery usage # Authors: Heuristic Andrew # http://heuristically.wordpress.com/2010/01/04/r-memory-usage-statistics-variable/ # Modified by Zhiwu Zhang # Last update: may 29, 2011 ############################################################################################## # print aggregate memory usage statistics print(paste('R is using', memory.size(), 'MB out of limit', memory.limit(), 'MB')) # create function to return matrix of memory consumption object.sizes <- function() { return(rev(sort(sapply(ls(envir=.GlobalEnv), function (object.name) object.size(get(object.name)))))) } # export file in table format memory=object.sizes() file=paste("GAPIT.", name.of.trait,".Memory.Object.csv" ,sep = "") write.table(memory, file, quote = FALSE, sep = ",", row.names = TRUE,col.names = TRUE) # export file in PDF format pdf(paste("GAPIT.", name.of.trait,".Memory.Object.pdf" ,sep = "")) # draw bar plot barplot(object.sizes(), main="Memory usage by object", ylab="Bytes", xlab="Variable name", col=heat.colors(length(object.sizes()))) # draw dot chart dotchart(object.sizes(), main="Memory usage by object", xlab="Bytes") # draw pie chart pie(object.sizes(), main="Memory usage by object") dev.off() } #============================================================================================= `GAPIT.Memory` <- function(Memory =NULL,Infor){ #Object: To report memory usage #Output: Memory #Authors: Zhiwu Zhang # Last update: June 6, 2011 ############################################################################################## gc() size <- memory.size() #print(paste("Memory usage: ",size," for", Infor)) if(is.null(Memory)) { Increased=0 Memory =cbind(Infor,size ,Increased) }else{ Increased=0 Memory.current=cbind(Infor,size ,Increased) Memory=rbind(Memory,Memory.current) Memory[nrow(Memory),3]=as.numeric(as.matrix(Memory[nrow(Memory),2]))-as.numeric(as.matrix(Memory[nrow(Memory)-1,2])) } return (Memory) }#end of GAPIT.Memory function #============================================================================================= `GAPIT.Multiple.Manhattan` <- function(model_store,DPP=50000,cutOff=0.01,band=5,seqQTN=NULL,Y=NULL,GM=NULL,interQTN=NULL,plot.style="Oceanic",plot.line=FALSE){ #Object: Make a Manhattan Plot #Options for plot.type = "Separate_Graph_for_Each_Chromosome" and "Same_Graph_for_Each_Chromosome" #Output: A pdf of the Manhattan Plot #Authors: Alex Lipka, Zhiwu Zhang, Meng Li and Jiabo Wang # Last update: Oct 10, 2016 #Add r2 between candidata SNP and other markers in on choromosome ############################################################################################## Nenviron=length(model_store)*(ncol(Y)-1) environ_name=NULL new_xz=NULL for(i in 1:length(model_store)) { for(j in 1:(ncol(Y)-1)) { environ_name=c(environ_name,paste(model_store[i],".",colnames(Y)[-1][j],sep="")) } } sig_pos=NULL simulation=FALSE if(!is.null(seqQTN)){ #seqQTN=-seqQTN simulation=TRUE } for(i in 1:length(environ_name)) { environ_result=read.csv(paste("GAPIT.",environ_name[i],".GWAS.Results.csv",sep=""),head=T) environ_filter=environ_result[!is.na(environ_result[,4]),] y_filter=environ_filter[environ_filter[,4]<(cutOff/(nrow(environ_filter))),] write.table(y_filter,paste("Filter_",environ_name[i],"_GWAS_result.txt",sep="")) result=environ_result[,1:4] result=result[match(as.character(GM[,1]),as.character(result[,1])),] # result=result[order(result[,2]),] # result=result[order(result[,1]),] #print(head(result)) rownames(result)=1:nrow(result) #print(i) if(i==1){ result0=result colnames(result0)[4]=environ_name[i] } if(i!=1){ result0=merge(result0,result[,c(1,4)],by.x=colnames(result0)[1],by.y=colnames(result)[1]) colnames(result0)[i+3]=environ_name[i] } rownames(result)=1:nrow(result) result[is.na(result[,4]),4]=1 sig_pos=append(sig_pos,as.numeric(rownames(result[result[!is.na(result[,4]),4]<(cutOff/nrow(result)),]))) } #if(length(sig_pos)!=0)sig_pos=sig_pos[!duplicated(sig_pos)] if(length(sig_pos[!is.na(sig_pos)])!=0) { x_matrix=as.matrix(table(sig_pos)) x_matrix=cbind(as.data.frame(rownames(x_matrix)),x_matrix) #print(x_matrix) lastbase=0 map_store=as.matrix(cbind(as.numeric(GM[,2]),as.numeric(as.vector(GM[,3])))) #print(head(map_store)) #print(as.numeric(map_store[,3])) for (j in unique(map_store[,1])) { index=map_store[,1]==j #print(table(index)) map_store[index,2]=as.numeric(map_store[index,2])+lastbase lastbase=max(as.numeric(map_store[index,2])) #print(lastbase) } colnames(x_matrix)=c("pos","times") #colnames(xz)=c("pos","col") new_xz=cbind(x_matrix,map_store[as.numeric(as.character(x_matrix[,1])),]) #new_xz[,4]=0 colnames(new_xz)=c("pos","times","chro","xlab") new_xz=new_xz[!duplicated(new_xz),] new_xz[new_xz[,2]>=3,2]=3 new_xz[,2]=4-new_xz[,2] new_xz[new_xz[,2]==3,2]=0 new_xz=as.matrix(new_xz) new_xz=new_xz[new_xz[,2]!="0",] new_xz=matrix(new_xz,length(as.vector(new_xz))/4,4) #print(new_xz) plot.line=TRUE #print(new_xz) } #print(as.numeric(new_xz[,4])) #print(head(result0)) # print(as.numeric(new_xz[,1])) pdf(paste("GAPIT.Manhattan.Mutiple.Plot",colnames(result0)[-c(1:3)],".pdf" ,sep = ""), width = 20,height=6*Nenviron) par(mfrow=c(Nenviron,1)) for(k in 1:Nenviron) { if(k==Nenviron){#par(mfrow=c(Nenviron,1)) par(mar = c(3,8,1,8)) }else{ #par(mfrow=c(Nenviron,1)) par(mar = c(0,8,1,8)) } environ_result=read.csv(paste("GAPIT.",environ_name[k],".GWAS.Results.csv",sep=""),head=T) #print(environ_result[as.numeric(new_xz[,1]),]) result=environ_result[,1:4] result=result[match(as.character(GM[,1]),as.character(result[,1])),] rownames(result)=1:nrow(result) GI.MP=result[,c(2:4)] borrowSlot=4 GI.MP[,borrowSlot]=0 #Inicial as 0 GI.MP[,5]=1:(nrow(GI.MP)) GI.MP[,6]=1:(nrow(GI.MP)) GI.MP <- GI.MP[!is.na(GI.MP[,1]),] GI.MP <- GI.MP[!is.na(GI.MP[,2]),] GI.MP[is.na(GI.MP[,3]),3]=1 #Retain SNPs that have P values between 0 and 1 (not na etc) GI.MP <- GI.MP[GI.MP[,3]>0,] GI.MP <- GI.MP[GI.MP[,3]<=1,] #Remove chr 0 and 99 GI.MP <- GI.MP[GI.MP[,1]!=0,] total_chromo=max(GI.MP[,1]) # print(dim(GI.MP)) if(!is.null(seqQTN))GI.MP[seqQTN,borrowSlot]=1 numMarker=nrow(GI.MP) bonferroniCutOff=-log10(cutOff/numMarker) GI.MP[,3] <- -log10(GI.MP[,3]) GI.MP[,5]=1:numMarker y.lim <- ceiling(max(GI.MP[,3])) chm.to.analyze <- unique(GI.MP[,1]) chm.to.analyze=chm.to.analyze[order(chm.to.analyze)] nchr=length(chm.to.analyze) GI.MP[,6]=1:(nrow(GI.MP)) MP_store=GI.MP index_GI=MP_store[,3]>=0 MP_store <- MP_store[index_GI,] ticks=NULL lastbase=0 for (i in chm.to.analyze) { index=(MP_store[,1]==i) ticks <- c(ticks, lastbase+mean(MP_store[index,2])) MP_store[index,2]=MP_store[index,2]+lastbase lastbase=max(MP_store[index,2]) } x0 <- as.numeric(MP_store[,2]) y0 <- as.numeric(MP_store[,3]) z0 <- as.numeric(MP_store[,1]) x1=sort(x0) position=order(y0,decreasing = TRUE) values=y0[position] if(length(values)<=DPP) { index=position[c(1:length(values))] }else{ values=sqrt(values) #This shift the weight a little bit to the low building. #Handler of bias plot rv=runif(length(values)) values=values+rv values=values[order(values,decreasing = T)] theMin=min(values) theMax=max(values) range=theMax-theMin interval=range/DPP ladder=round(values/interval) ladder2=c(ladder[-1],0) keep=ladder-ladder2 index=position[which(keep>=0)] } x=x0[index] y=y0[index] z=z0[index] # print(length(x)) #Extract QTN #if(!is.null(seqQTN))MP_store[seqQTN,borrowSlot]=1 #if(!is.null(interQTN))MP_store[interQTN,borrowSlot]=2 QTN=MP_store[which(MP_store[,borrowSlot]==1),] #Draw circles with same size and different thikness size=1 #1 ratio=10 #5 base=1 #1 numCHR=nchr themax=ceiling(max(y)) themin=floor(min(y)) wd=((y-themin+base)/(themax-themin+base))*size*ratio s=size-wd/ratio/2 ncycle=ceiling(nchr/5) ncolor=5*ncycle ncolor=band*ncycle thecolor=seq(1,nchr,by= ncycle) mypch=1 #plot.color= rainbow(ncolor+1) col.Rainbow=rainbow(ncolor+1) col.FarmCPU=rep(c("#CC6600","deepskyblue","orange","forestgreen","indianred3"),ceiling(numCHR/5)) col.Rushville=rep(c("orangered","navyblue"),ceiling(numCHR/2)) col.Congress=rep(c("deepskyblue3","firebrick"),ceiling(numCHR/2)) col.Ocean=rep(c("steelblue4","cyan3"),ceiling(numCHR/2)) col.PLINK=rep(c("gray10","gray70"),ceiling(numCHR/2)) col.Beach=rep(c("turquoise4","indianred3","darkolivegreen3","red","aquamarine3","darkgoldenrod"),ceiling(numCHR/5)) #col.Oceanic=rep(c( '#EC5f67', '#F99157', '#FAC863', '#99C794', '#5FB3B3', '#6699CC', '#C594C5', '#AB7967'),ceiling(numCHR/8)) #col.Oceanic=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5', '#AB7967'),ceiling(numCHR/6)) col.Oceanic=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(numCHR/5)) col.cougars=rep(c( '#990000', 'dimgray'),ceiling(numCHR/2)) if(plot.style=="Rainbow")plot.color= col.Rainbow if(plot.style =="FarmCPU")plot.color= col.Rainbow if(plot.style =="Rushville")plot.color= col.Rushville if(plot.style =="Congress")plot.color= col.Congress if(plot.style =="Ocean")plot.color= col.Ocean if(plot.style =="PLINK")plot.color= col.PLINK if(plot.style =="Beach")plot.color= col.Beach if(plot.style =="Oceanic")plot.color= col.Oceanic if(plot.style =="cougars")plot.color= col.cougars #plot.color=rep(c( '#EC5f67', '#FAC863', '#99C794', '#6699CC', '#C594C5'),ceiling(ncolor/5)) plot(y~x,xlab="",ylab="" ,ylim=c(0,themax), cex.axis=4, cex.lab=4, ,col=plot.color[z],axes=FALSE,type = "p",pch=mypch,lwd=wd,cex=s+2.5,cex.main=4) mtext(side=2,expression(-log[10](italic(p))),line=3, cex=2.5) #Label QTN positions #print(head(QTN)) #print(head(interQTN)) if(!simulation){abline(v=QTN[2], lty = 2, lwd=1.5, col = "grey")}else{ #print("$$$$$$") points(QTN[,2], QTN[,3], pch=20, cex=2.5,lwd=2.5,col="black") #points(interQTN[,2], interQTN[,3], type="p",pch=8, cex=1,lwd=1.5,col="dimgrey") } #} if(plot.line){ #print(x) #print(as.numeric(new_xz[,2])) # if(!is.null(nrow(new_xz))) {abline(v=as.numeric(new_xz[,4]),col=plot.color[as.numeric(new_xz[,3])],lty=as.numeric(new_xz[,2]),untf=T,lwd=3) if(!is.null(nrow(new_xz))) {abline(v=as.numeric(new_xz[,4]),col="grey",lty=as.numeric(new_xz[,2]),untf=T,lwd=3) }else{abline(v=as.numeric(new_xz[1]),col=plot.color[as.numeric(new_xz[3])],lty=as.numeric(new_xz[2]),untf=T,lwd=3) } } #Add a horizontal line for bonferroniCutOff abline(h=bonferroniCutOff,lty=2,untf=T,lwd=3,col="red") axis(2, xaxp=c(1,themax,5),cex.axis=2.5,tick=F) if(k==Nenviron)axis(1, at=ticks,cex.axis=2.7,labels=chm.to.analyze,tick=F) mtext(side=4,paste(environ_name[k],sep=""),line=3,cex=2.5) box() }#end of environ_name dev.off() print("GAPIT.Manhattan.Mutiple.Plot has done !!!") return(list(multip_mapP=result0,xz=new_xz)) } #end of GAPIT.Manhattan #============================================================================================= `GAPIT.Numericalization` <- function(x,bit=2,effect="Add",impute="None", Create.indicator = FALSE, Major.allele.zero = FALSE, byRow=TRUE){ #Object: To convert character SNP genotpe to numerical #Output: Coresponding numerical value #Authors: Feng Tian and Zhiwu Zhang # Last update: May 30, 2011 ############################################################################################## if(bit==1) { x[x=="X"]="N" x[x=="-"]="N" x[x=="+"]="N" x[x=="/"]="N" x[x=="K"]="Z" #K (for GT genotype)is replaced by Z to ensure heterozygose has the largest value } if(bit==2) { x[x=="XX"]="N" x[x=="--"]="N" x[x=="++"]="N" x[x=="//"]="N" x[x=="NN"]="N" x[x=="00"]="N" } n=length(x) lev=levels(as.factor(x)) lev=setdiff(lev,"N") #print(lev) len=length(lev) #print(len) #Jiabo creat this code to convert AT TT to 1 and 2. 2018.5.29 if(bit==2) { inter_store=c("AT","AG","AC","TA","GA","CA","GT","TG","GC","CG","CT","TC") inter=intersect(lev,inter_store) if(length(inter)>1) { x[x==inter[2]]=inter[1] n=length(x) lev=levels(as.factor(x)) lev=setdiff(lev,"N") #print(lev) len=length(lev) } if(len==2&bit==2) { #inter=intersect(lev,inter_store) if(!is.na(inter[1])) { lev=union(lev,"UU") len=len+1 } } if(len==3&bit==2) { inter=intersect(lev,inter_store) } } #print(lev) #print(len) #Jiabo code is end here #Genotype counts count=1:len for(i in 1:len){ count[i]=length(x[(x==lev[i])]) } #print(count) if(Major.allele.zero){ if(len>1 & len<=3){ #One bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the second position if(bit==1){ count.temp = cbind(count, seq(1:len)) if(len==3) count.temp = count.temp[-3,] count.temp <- count.temp[order(count.temp[,1], decreasing = TRUE),] if(len==3)order = c(count.temp[,2],3)else order = count.temp[,2] } #Two bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the third position if(bit==2){ count.temp = cbind(count, seq(1:len)) if(len==3) count.temp = count.temp[-2,] count.temp <- count.temp[order(count.temp[,1], decreasing = TRUE),] if(len==3) order = c(count.temp[1,2],2,count.temp[2,2])else order = count.temp[,2] } count = count[order] lev = lev[order] } #End if(len<=1 | len> 3) } #End if(Major.allele.zero) #print(x) #make two bit order genotype as AA,AT and TT, one bit as A(AA),T(TT) and X(AT) if(bit==1 & len==3){ temp=count[2] count[2]=count[3] count[3]=temp } #print(lev) #print(count) position=order(count) #Jiabo creat this code to convert AT TT to 1 and 2.2018.5.29 lev1=lev if(bit==2&len==3) { lev1[1]=lev[count==sort(count)[1]] lev1[2]=lev[count==sort(count)[2]] lev1[3]=lev[count==sort(count)[3]] position=c(1:3) lev=lev1 } #print(lev) #print(position) #print(inter) #Jiabo code is end here #1status other than 2 or 3 if(len<=1 | len> 3)x=0 #2 status if(len==2)x=ifelse(x=="N",NA,ifelse(x==lev[1],0,2)) #3 status if(bit==1){ if(len==3)x=ifelse(x=="N",NA,ifelse(x==lev[1],0,ifelse(x==lev[3],1,2))) }else{ if(len==3)x=ifelse(x=="N",NA,ifelse(x==lev[lev!=inter][1],0,ifelse(x==inter,1,2))) } #print(paste(lev,len,sep=" ")) #print(position) #missing data imputation if(impute=="Middle") {x[is.na(x)]=1 } if(len==3){ if(impute=="Minor") {x[is.na(x)]=position[1] -1} if(impute=="Major") {x[is.na(x)]=position[len]-1} }else{ if(impute=="Minor") {x[is.na(x)]=2*(position[1] -1)} if(impute=="Major") {x[is.na(x)]=2*(position[len]-1)} } #alternative genetic models if(effect=="Dom") x=ifelse(x==1,1,0) if(effect=="Left") x[x==1]=0 if(effect=="Right") x[x==1]=2 if(byRow) { result=matrix(x,n,1) }else{ result=matrix(x,1,n) } return(result) }#end of GAPIT.Numericalization function #============================================================================================= `GAPIT.PCA` <- function(X,taxa, PC.number = min(ncol(X),nrow(X)),file.output=TRUE,PCA.total=0,PCA.col=NULL,PCA.3d=FALSE){ # Object: Conduct a principal component analysis, and output the prinicpal components into the workspace, # a text file of the principal components, and a pdf of the scree plot # Authors: Alex Lipka and Hyun Min Kang # Last update: May 31, 2011 ############################################################################################## #Conduct the PCA print("Calling prcomp...") PCA.X <- prcomp(X) eigenvalues <- PCA.X$sdev^2 evp=eigenvalues/sum(eigenvalues) nout=min(10,length(evp)) xout=1:nout if(is.null(PCA.col)) PCA.col="red" print("Creating PCA graphs...") #Create a Scree plot if(file.output & PC.number>1) { pdf("GAPIT.PCA.eigenValue.pdf", width = 12, height = 12) par(mar=c(5,5,4,5)+.1,cex=2) #par(mar=c(10,9,9,10)+.1) plot(xout,eigenvalues[xout],type="b",col="blue",xlab="Principal components",ylab="Variance") par(new=TRUE) plot(xout,evp[xout]*100,type="n",col="red",xaxt="n",yaxt="n",xlab="",ylab="") axis(4) mtext("Percentage (%)",side=4,line=3,cex=2) dev.off() pdf("GAPIT.PCA.2D.pdf", width = 8, height = 8) par(mar = c(5,5,5,5)) maxPlot=min(as.numeric(PC.number[1]),3) for(i in 1:(maxPlot-1)){ for(j in (i+1):(maxPlot)){ plot(PCA.X$x[,i],PCA.X$x[,j],xlab=paste("PC",i,sep=""),ylab=paste("PC",j,sep=""),pch=19,col=PCA.col,cex.axis=1.3,cex.lab=1.4, cex.axis=1.2, lwd=2,las=1) } } dev.off() #output 3D plot if(PCA.3d==TRUE) { if(1>2) {if(!require(lattice)) install.packages("lattice") library(lattice) pca=as.data.frame(PCA.X$x) png(file="example%03d.png", width=500, heigh=500) for (i in seq(10, 80 , 1)){ print(cloud(PC1~PC2*PC3,data=pca,screen=list(x=i,y=i-40),pch=20,color="red", col.axis="blue",cex=1,cex.lab=1.4, cex.axis=1.2,lwd=3)) } dev.off() system("convert -delay 40 *.png GAPIT.PCA.3D.gif") # cleaning up file.remove(list.files(pattern=".png")) } if(!require(rgl)) install.packages("rgl") if(!require(rglwidget)) install.packages("rglwidget") library(rgl) PCA1 <- PCA.X$x[,1] PCA2 <- PCA.X$x[,2] PCA3 <- PCA.X$x[,3] plot3d(PCA1, PCA2, PCA3, col = "white",radius=0.01) num_col=length(unique(PCA.col)) if(num_col==1) { sids1 <- spheres3d(PCA1, PCA2, PCA3, col = PCA.col,radius=1) widgets<-rglwidget(width = 900, height = 900) %>%toggleWidget(ids = sids1, label = "PCA") }else if(num_col==2) { index1=PCA.col==unique(PCA.col)[1] index2=PCA.col==unique(PCA.col)[2] sids1 <- spheres3d(PCA1[index1], PCA2[index1], PCA3[index1], col = PCA.col[index1],radius=1) sids2 <- spheres3d(PCA1[index2], PCA2[index2], PCA3[index2], col = PCA.col[index2],radius=1) widgets<-rglwidget(width = 900, height = 900) %>%toggleWidget(ids = sids1, label = "Population 1")%>%toggleWidget(ids = sids2, label = "Population 2") }else if(num_col==3) { index1=PCA.col==unique(PCA.col)[1] index2=PCA.col==unique(PCA.col)[2] index3=PCA.col==unique(PCA.col)[3] sids1 <- spheres3d(PCA1[index1], PCA2[index1], PCA3[index1], col = PCA.col[index1],radius=1) sids2 <- spheres3d(PCA1[index2], PCA2[index2], PCA3[index2], col = PCA.col[index2],radius=1) sids3 <- spheres3d(PCA1[index3], PCA2[index3], PCA3[index3], col = PCA.col[index3],radius=1) widgets<-rglwidget(width = 900, height = 900) %>%toggleWidget(ids = sids1, label = "Population 1")%>%toggleWidget(ids = sids2, label = "Population 2")%>%toggleWidget(ids = sids3, label = "Population 3") }else if(num_col==4) { index1=PCA.col==unique(PCA.col)[1] index2=PCA.col==unique(PCA.col)[2] index3=PCA.col==unique(PCA.col)[3] index4=PCA.col==unique(PCA.col)[4] sids1 <- spheres3d(PCA1[index1], PCA2[index1], PCA3[index1], col = PCA.col[index1],radius=1) sids2 <- spheres3d(PCA1[index2], PCA2[index2], PCA3[index2], col = PCA.col[index2],radius=1) sids3 <- spheres3d(PCA1[index3], PCA2[index3], PCA3[index3], col = PCA.col[index3],radius=1) sids4 <- spheres3d(PCA1[index4], PCA2[index4], PCA3[index4], col = PCA.col[index4],radius=1) widgets<-rglwidget(width = 900, height = 900) %>%toggleWidget(ids = sids1, label = "Population 1")%>%toggleWidget(ids = sids2, label = "Population 2")%>%toggleWidget(ids = sids3, label = "Population 3")%>%toggleWidget(ids = sids4, label = "Population 4") } if (interactive()) widgets htmltools::save_html(widgets, "Interactive.PCA.html") } if(!require(scatterplot3d)) install.packages("scatterplot3d") library(scatterplot3d) pdf("GAPIT.PCA.3D.pdf", width = 7, height = 7) par(mar = c(5,5,5,5)) scatterplot3d(PCA.X$x[,1],PCA.X$x[,2],PCA.X$x[,3],xlab=paste("PC",1,sep=""),ylab=paste("PC",2,sep=""),zlab=paste("PC",3,sep="") ,pch=20,color=PCA.col,col.axis="blue",cex=1,cex.lab=1.4, cex.axis=1.2,lwd=3,angle=55,scale.y=0.7) dev.off() } print("Joining taxa...") #Extract number of PCs needed PCs <- cbind(taxa,as.data.frame(PCA.X$x)) #Remove duplicate (This is taken care by QC) #PCs.unique <- unique(PCs[,1]) #PCs <-PCs[match(PCs.unique, PCs[,1], nomatch = 0), ] print("Exporting PCs...") #Write the PCs into a text file if(file.output) write.table(PCs[,1:(PCA.total+1)], "GAPIT.PCA.csv", quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) if(file.output) write.table(PCA.X$rotation[,1:PC.number], "GAPIT.PCA.loadings.csv", quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) if(file.output) write.table(eigenvalues, "GAPIT.PCA.eigenvalues.csv", quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #Return the PCs return(list(PCs=PCs,EV=PCA.X$sdev^2,nPCs=NULL)) } #============================================================================================= `GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure` <- function(PWI = PWI, FDR.Rate = 0.05, FDR.Procedure = "BH"){ #Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure #Output: PWIP, number.of.significant.SNPs #Authors: Alex Lipka and Zhiwu Zhang # Last update: May 5, 2011 ############################################################################################## #Make sure that your compouter has the latest version of Bioconductor (the "Biobase" package) and multtest if(is.null(PWI)) { PWIP=NULL number.of.significant.SNPs = 0 } if(!is.null(PWI)) { #library(multtest) if(dim(PWI)[1] == 1){ PWIP <- cbind(PWI, PWI[4]) colnames(PWIP)[9] <- "FDR_Adjusted_P-values" } if(dim(PWI)[1] > 1){ #mt.rawp2adjp Performs the Simes procedure. The output should be two columns, Left column: originial p-value #Right column: Simes corrected p-value res <- mt.rawp2adjp(PWI[,4], FDR.Procedure) #This command should order the p-values in the order of the SNPs in the data set adjp <- res$adjp[order(res$index), ] #round(adjp[1:7,],4) #Logical statment: 0, if Ho is not rejected; 1, if Ho is rejected, by the Simes corrected p-value # temp <- mt.reject(adjp[,2], FDR.Rate) #Lists all number of SNPs that were rejected by the BY procedure #temp$r #Attach the FDR adjusted p-values to AS_Results PWIP <- cbind(PWI, adjp[,2]) #Sort these data by lowest to highest FDR adjusted p-value PWIP <- PWIP[order(PWIP[,4]),] colnames(PWIP)[9] <- "FDR_Adjusted_P-values" # number.of.significant.SNPs = temp$r } #print("GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure accomplished successfully!") } #return(list(PWIP=PWIP, number.of.significant.SNPs = number.of.significant.SNPs)) return(list(PWIP=PWIP)) }#GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure ends here #============================================================================================= `GAPIT.Phenotype.PCA.View` <-function(PC=NULL,myY=NULL){ # Object: Analysis PCA effection for Phenotype data ,result:a pdf of the scree plot # myG:Genotype data # myY:Phenotype data # Authors: You Tang # Last update: Sep 7, 2015 ############################################################################################## print("GAPIT.Phenotype.PCA.View") if(is.null(PC)){stop("Validation Invalid. Please input four PC value !")} if(is.null(myY)){stop("Validation Invalid. Please select read valid Phenotype flies !")} y<-myY[!is.na(myY[,2]),c(1:2)] traitname=colnames(y)[2] cv1<-PC[!is.na(match(PC[,1],y[,1])),] y1<-y[!is.na(match(y[,1],cv1[,1])),] y2<-y1[order(y1[,1]),] cv2<-cv1[order(cv1[,1]),] lcor=round(cor(y2[,-1],cv2[,-1])*100)/100 y.range=max(y2[,2])-min(y2[,2]) y.mean=mean(y2[,2]) n.col=54 y.int=round(abs(y2[,2]-y.mean)/y.range*(.5*n.col-1)*2)+1 mycol=rainbow(n.col) y.col=mycol[y.int] y.lab=paste("PC",seq(1:4)," (r=",lcor,")",sep="") pdf(paste("GAPIT.",traitname,"_vs_PC.pdf",sep=""), width =9, height = 6) #par(mar = c(5,5,5,5)) par(mar = c(5,5,2,2)) par(mfrow=c(2,2)) plot(y2[,2],cv2[,2],bg="lightgray",xlab="Phenotype",ylab=y.lab[1],main="",cex.lab=1.4,col=y.col) if(ncol(PC)>2) plot(y2[,2],cv2[,3],bg="lightgray",xlab="Phenotype",ylab=y.lab[2],main="",cex.lab=1.4,col=y.col) if(ncol(PC)>3) plot(y2[,2],cv2[,4],bg="lightgray",xlab="Phenotype",ylab=y.lab[3],main="",cex.lab=1.4,col=y.col) if(ncol(PC)>4) plot(y2[,2],cv2[,5],bg="lightgray",xlab="Phenotype",ylab=y.lab[4],main="",cex.lab=1.4,col=y.col) dev.off() print(paste("GAPIT.Phenotype.PCA.View ", ".output pdf generate.","successfully!" ,sep = "")) #GAPIT.Phenotype.View } #============================================================================================= `GAPIT.Phenotype.Simulation` <- function(GD,GM=NULL,h2=.75,NQTN=10,QTNDist="normal",effectunit=1,category=1,r=0.25,CV,cveff=NULL,a2=0,adim=2){ #Object: To simulate phenotype from genotye #Input: GD - n by m +1 dataframe or n by m big.matrix #intput: h2 - heritability #intput: NQTN - number of QTNs #intput: QTNDist - Distribution of QTN, options are "geometry", "normal" #intput: effectunit - effect of fitst QTN, the nect effect is its squre #intput: theSeed - seed for randomization #Output: Y,U,E,QTN.Position, and effect #Straitegy: NA #Authors: Qishan Wang and Zhiwu Zhang #Start date: April 4, 2013 #Last update: April 4, 2013 #Set orientation #Strategy: the number of rows in GD and GM are the same if GD has SNP as row ############################################################################################## #print("GAPIT.Phenotype.Simulation") nm=ncol(GD)-1 #Initial by assume GD has snp in col if(!is.null(GM)) nm=nrow(GM) ngd1=nrow(GD) ngd2=ncol(GD) ngd1=abs(ngd1-nm) ngd2=abs(ngd2-nm) orientation="row" ns=ncol(GD) if(min(ngd1,ngd2)>0){ orientation="col" ns=nrow(GD) } n= ns #number of samples m=nm #number of markers #Set QTN effects if (QTNDist=="normal"){ addeffect<-rnorm(NQTN,0,1) }else {addeffect=effectunit^(1:NQTN)} #Simulating Genetic effect #r=sample(2:m,NQTN,replace=F) QTN.position=sample(1:m,NQTN,replace=F) if(orientation=="col") SNPQ=as.matrix(GD[,(QTN.position+1)]) if(orientation=="row") SNPQ=t(as.matrix(GD[QTN.position,])) #Replace non-variant QTNs (does not work yet) #inComplete=TRUE #while(inComplete){ # inComplete=FALSE # myVar=apply(SNPQ,2,var) # index=which(myVar==0) # nInVar=length(index) # if(nInVar>0){ # inComplete=TRUE # New.position=sample(1:m,nInVar,replace=F) # if(orientation=="col") SNPQ[,index]=as.matrix(GD[,(New.position+1)]) # if(orientation=="row") SNPQ[,index]=t(as.matrix(GD[New.position,])) # } #}#end of while effect=SNPQ%*%addeffect effectvar=var(effect) #Interaction cp=0*effect nint= adim if(a2>0&NQTN>=nint){ for(i in nint:nint){ Int.position=sample(NQTN,i,replace=F) cp=apply(SNPQ[,Int.position],1,prod) } cpvar=var(cp) intvar=(effectvar-a2*effectvar)/a2 if(is.na(cp[1]))stop("something wrong in simulating interaction") if(cpvar>0){ #print(c(effectvar,intvar,cpvar,var(cp),a2)) #print(dim(cp)) cp=cp/sqrt(cpvar) cp=cp*sqrt(intvar) effectvar=effectvar+intvar }else{cp=0*effect} } #Residual variance if(h2 >0){ residualvar=(effectvar-h2*effectvar)/h2 }else{ residualvar=1 effect= effect*0 } #Variance explained by each SNP effectInd=SNPQ%*%diag(addeffect) varInd=apply(effectInd,2,var) effectSeq=order(varInd,decreasing = TRUE) #Simulating Residual and phenotype residual=rnorm(n,0,sqrt(residualvar)) #environment effect if(!is.null(cveff)){ #print(cveff) vy=effectvar+residualvar #print(vy) ev=cveff*vy/(1-cveff) ec=sqrt(ev)/sqrt(diag(var(CV[,-1]))) enveff=as.matrix(myCV[,-1])%*%ec #print(cbind(effectvar,residualvar,ev,ec)) #print(cbind(effect,enveff,residual)) residual=residual+enveff } #Simulating phenotype y=effect+residual+cp if(orientation=="col") myY=cbind(as.data.frame(GD[,1]),as.data.frame(y)) if(orientation=="row") myY=cbind(NA,as.data.frame(y)) #Convert to category phenotype if(category>1){ myQuantile =(0:category)/category y.num= myY[,2] cutoff=quantile(y.num, myQuantile) y.cat= .bincode(y.num,cutoff,include.lowest = T) myY[,2]=y.cat } #Binary phenotype if(category==0){ #Standardization #print("Binary phenotype") #print(mean(effect)) #print(sqrt(effectvar)) #print(dim(effect)) x=(effect-mean(effect)) x=x/as.numeric(sqrt(effectvar)) myF=GAPIT.BIPH(x,h2=h2,r=r) p=runif(n) index=pm){stop("Validation Invalid. Please select folders num >1 !")} vali<-matrix(nr=m.sample,nc=num-1) cali<-matrix(nr=m-m.sample,nc=num-1) #vali<-list(v1=unique(as.character(sample(y$Taxa, m.sample)))) #cali<-list(c1=y[!(y$Taxa %in% as.matrix(as.data.frame(vali[1]))), 'Taxa']) vali[,1]<-unique(as.character(sample(y$Taxa, m.sample))) cali[,1]<-unique(as.character(y[!(y$Taxa %in% vali[,1]), 'Taxa'])) for(j in 2:num) { if(j!=num) { vali[,j]<-unique(as.character(sample(y[!(y$Taxa %in% vali[,1:j-1]), 'Taxa'], m.sample) )) } if(j==num) { valilast=unique(as.character(y[!(y$Taxa %in% vali[,1:j-1]), 'Taxa'])) } if(j!=num) cali[,j]<-unique(as.character(y[!(y$Taxa %in% vali[,j]), 'Taxa'])) if(j==num) calilast <<- y[!(y$Taxa %in% valilast), 'Taxa'] } i=sample(1:num, size = 1) if(i!=num){ lines.vali<-vali[,i] }else{ lines.vali<-valilast } #use only genotypes that were genotyped and phenotyped commonGeno_v <- lines.vali[lines.vali %in% myK[,1]] yvali<- y[match(commonGeno_v,y$Taxa),] if(i!=num){ lines.cali<-cali[,i] }else{ lines.cali<-calilast } #use only genotypes that were genotyped and phenotyped commonGeno_c <- lines.cali[lines.cali %in% myK[,1]] ycali<- y[match(commonGeno_c,y$Taxa),] Y.raw=ycali[,c(1,2)]#choos a trait myY=Y.raw myKI=myK max.groups=m #Run GAPIT ############################################# blupGAPIT <- GAPIT( Y=myY, KI=myKI, #group.from=max.groups, group.from=1, group.to=max.groups, #group.by=10, #PCA.total=3, SNP.test=FALSE, file.output=FALSE ) blup_prediction=blupGAPIT$GPS blue<-blupGAPIT$Pred$BLUE mean_blue<-mean(blue) blup_prediction.ref<-blup_prediction[match(commonGeno_c,blup_prediction$Taxa),] blup_prediction.inf<-blup_prediction[match(commonGeno_v,blup_prediction$Taxa),] inf_BLUP<-blup_prediction.inf$BLUP ref_BLUP<-blup_prediction.ref$BLUP inf_pred<-inf_BLUP+mean_blue ref_pred<-ref_BLUP+mean_blue inf_all<-cbind(blup_prediction.inf,inf_pred) ref_all<-cbind(blup_prediction.ref,ref_pred) inf_Y_all<-merge(y,inf_all,by.x="Taxa",by.y="Taxa") ref_Y_all<-merge(y,ref_all,by.x="Taxa",by.y="Taxa") name.of.trait=noquote(names(Y.raw)[2]) pdf(paste("GAPIT.Prediction ", name.of.trait,".Predict reference.pdf", sep = ""), width =6, height = 6) par(mar = c(5,5,5,5)) plot(ref_Y_all[,2],ref_Y_all[,8],pch=1,xlab="Observed(Ref)",ylab="Predicted(Ref)",cex.lab=1.3,cex.axis=1.2,lwd=2) #xlim=c(50,110),ylim=c(50,110), kr<-lm(ref_Y_all[,8]~ref_Y_all[,2]) abline(a = kr$coefficients[1], b = kr$coefficients[2], col = "red",lwd=4,lty=1) #v1<-max(ref_Y_all[,2]])*10/10 #text(v1,kr$coefficients[1]+kr$coefficients[2]*v1,paste("R^2=",format(kr$coefficients[2], digits = 3),seq=""), col = "blue", adj = c(0, -.1)) legend("bottomright",paste("R^2=",format(kr$coefficients[2], digits = 4),seq=""), col="white",text.col="blue",lwd=2,cex=1.2,bty="n") dev.off() pdf(paste("GAPIT.Prediction ", name.of.trait,".Predict inference.pdf", sep = ""), width = 6, height = 6) par(mar = c(5,5,5,5)) plot(inf_Y_all[,2],inf_Y_all[,8],pch=1,xlab="Observed(Inf)",ylab="Predicted(Inf)",cex.lab=1.5,lwd=2,,cex.axis=1.2)#xlim=c(50,110),ylim=c(45,100), ki<-lm(inf_Y_all[,8]~inf_Y_all[,2]) abline(a = ki$coefficients[1], b = ki$coefficients[2], col = "red",lwd=3,lty=1) #v0<-max(inf_Y_all[,2]) #text(v0,ki$coefficients[1]+ki$coefficients[2]*v0,paste("R^2=",format(ki$coefficients[2], digits = 4),seq=""), col = "blue", adj = c(0, -.1)) legend("bottomright",paste("R^2=",format(ki$coefficients[2], digits = 4),seq=""), col="white",text.col="blue",lwd=2,cex=1.2,bty="n") dev.off() print(paste("GAPIT.Prediction ", name.of.trait,".Predict phenotype.","successfully!" ,sep = "")) return(list(inf_Y_all,ref_Y_all)) } #end Prediction one time #============================================================================================= `GAPIT.Pruning` <- function(values,DPP=5000){ #Object: To get index of subset that evenly distribute #Output: Index #Authors: Zhiwu Zhang # Last update: May 28, 2011 ############################################################################################## #No change if below the requirement if(length(values)<=DPP)return(c(1:length(values))) #values= log.P.values values=sqrt(values) #This shift the weight a little bit to the low building. #Handler of bias plot rv=runif(length(values)) values=values+rv values=values[order(values,decreasing = T)] theMin=min(values) theMax=max(values) range=theMax-theMin interval=range/DPP ladder=round(values/interval) ladder2=c(ladder[-1],0) keep=ladder-ladder2 index=which(keep>0) return(index) }#end of GAPIT.Pruning #============================================================================================= `GAPIT.QC` <- function(Y=NULL,KI=NULL,GT=NULL,CV=NULL,Z=NULL,GK=NULL){ #Object: to do data quality control #Output: Y, KI, GD, CV, Z, flag #Authors: Zhiwu Zhang and Alex Lipka # Last update: April 14, 2011 ############################################################################################## #Remove duplicates print("Removing duplicates...") #print(dim(CV)) Y=GAPIT.RemoveDuplicate(Y) CV=GAPIT.RemoveDuplicate(CV) GK=GAPIT.RemoveDuplicate(GK) if(!is.null(Z))Z=GAPIT.RemoveDuplicate(Z) #Remove missing phenotype print("Removing NaN...") Y=Y[which(Y[,2]!="NaN"),] # Remove duplicates for GT # GT row wise, Z column wise, and KI both direction. print("Remove duplicates for GT...") #print(dim(GT)) if(!is.null(GT)) { if(is.null(dim(GT)))taxa.kept=unique(GT) if(!is.null(dim(GT)))taxa.kept=unique(GT[,1]) }else{ taxa.kept=unique(Y[,1]) } # Remove duplicates for KI print("Remove duplicates for KI...") # improve speed: remove t() and use cbind if(!is.null(KI)) { taxa.all=KI[,1] taxa.uniqe=unique(taxa.all) position=match(taxa.uniqe, taxa.all,nomatch = 0) position.addition=cbind(1,t(1+position)) KI=KI[position,position.addition] } #Sort KI if(!is.null(KI)) { taxa.all=KI[,1] position=order(taxa.all) position.addition=cbind(1,t(1+position)) KI=KI[position,position.addition] } # Remove duplicates for Z rowwise print("Remove duplicates for Z (column wise)...") if(!is.null(Z)) { taxa.all=as.matrix(Z[1,]) taxa.uniqe=intersect(taxa.all,taxa.all) position=match(taxa.uniqe, taxa.all,nomatch = 0) Z=Z[,position] } #Remove the columns of Z if they are not in KI/GT. KI/GT are allowed to have individuals not in Z print("Maching Z with Kinship colwise...") if(!is.null(KI)) { taxa.all=KI[,1] taxa.kinship=unique(taxa.all) } if(!is.null(Z) & !is.null(KI)) { #get common taxe between KI and Z taxa.Z=as.matrix(Z[1,]) #taxa.Z=colnames(Z) #This does not work for names starting with numerical or "-" \ if(is.null(KI)){ taxa.Z_K_common=taxa.Z }else{ taxa.Z_K_common=intersect(taxa.kinship,taxa.Z) } Z <-cbind(Z[,1], Z[,match(taxa.Z_K_common, taxa.Z, nomatch = 0)]) #Remove the rows of Z if all the ellements sum to 0 #@@@ improve speed: too many Zs print("Maching Z without origin...") Z1=Z[-1,-1] Z2=data.frame(Z1) Z3=as.matrix(Z2) Z4=as.numeric(Z3) #one dimemtion Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1)) RS=rowSums(Z5)>0 #The above process could be simplified! Z <- Z[c(TRUE,RS),] #make individuals the same in Z, Y, GT and CV print("Maching GT and CV...") if(length(Z)<=1)stop("GAPIT says: there is no place to match IDs!") }# end of if(!is.null(Z) & !is.null(K)) # get intersect of all the data taxa=intersect(Y[,1],Y[,1]) if(!is.null(Z))taxa=intersect(Z[-1,1],taxa) if(!is.null(GT))taxa=intersect(taxa,taxa.kept) if(!is.null(CV))taxa=intersect(taxa,CV[,1]) if(!is.null(GK))taxa=intersect(taxa,GK[,1]) if(length(taxa)<=1)stop("GAPIT says: There is no individual ID matched to covariate. Please check!") if(!is.null(Z)) { #Remove taxa in Z that are not in others, columnwise t=c(TRUE, Z[-1,1]%in%taxa) if(length(t)<=2)stop("GAPIT says: There is no individual ID matched among data. Please check!") Z <- Z[t,] #Remove the columns of Z if all the ellements sum to 0 print("QC final process...") #@@@ improve speed: too many Zs Z1=Z[-1,-1] Z2=data.frame(Z1) Z3=as.matrix(Z2) Z4=as.numeric(Z3) #one dimemtion Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1)) CS=colSums(Z5)>0 #The above process could be simplified! Z <- Z[,c(TRUE,CS)] } #Filtering with comman taxa Y <- Y[Y[,1]%in%taxa,] if(!is.null(CV)) CV=CV[CV[,1]%in%taxa,] if(!is.null(GK)) GK=GK[GK[,1]%in%taxa,] if(!is.null(GT)) taxa.kept=data.frame(taxa.kept[taxa.kept%in%taxa]) #Y <- Y[Y[,1]%in%taxa.kept,] #To sort Y, GT, CV and Z Y=Y[order(Y[,1]),] CV=CV[order(CV[,1]),] if(!is.null(GK))GK=GK[order(GK[,1]),] if(!is.null(Z))Z=Z[c(1,1+order(Z[-1,1])),] #get position of taxa.kept in GT #position=match(taxa.kept[,1], GT[,1],nomatch = 0) if(is.null(dim(GT)))position=match(taxa.kept, GT,nomatch = 0) if(!is.null(dim(GT)))position=match(taxa.kept[,1], GT[,1],nomatch = 0) if(is.null(dim(taxa.kept)))order.taxa.kept=order(taxa.kept) if(!is.null(dim(taxa.kept)))order.taxa.kept=order(taxa.kept[,1]) GTindex=position[order.taxa.kept] flag=nrow(Y)==nrow(Z)-1&nrow(Y)==nrow(GT)&nrow(Y)==nrow(CV) print("GAPIT.QC accomplished successfully!") #print(dim(Y)) #print(dim(CV)) #print(dim(KI)) return(list(Y = Y, KI = KI, GT = GT, CV = CV, Z = Z, GK = GK, GTindex=GTindex, flag=flag)) }#The function GAPIT.QC ends here #============================================================================================= `GAPIT.QQ` <- function(P.values, plot.type = "log_P_values", name.of.trait = "Trait",DPP=50000,plot.style="rainbow"){ #Object: Make a QQ-Plot of the P-values #Options for plot.type = "log_P_values" and "P_values" #Output: A pdf of the QQ-plot #Authors: Alex Lipka and Zhiwu Zhang # Last update: May 9, 2011 ############################################################################################## # Sort the data by the raw P-values #print("Sorting p values") #print(paste("Number of P values: ",length(P.values))) #remove NAs and keep the ones between between 0 and 1 P.values=P.values[!is.na(P.values)] P.values=P.values[P.values>0] P.values=P.values[P.values<=1] if(length(P.values[P.values>0])<1) return(NULL) N=length(P.values) DPP=round(DPP/4) #Reduce to 1/4 for QQ plot P.values <- P.values[order(P.values)] #Set up the p-value quantiles #print("Setting p_value_quantiles...") p_value_quantiles <- (1:length(P.values))/(length(P.values)+1) if(plot.type == "log_P_values") { log.P.values <- -log10(P.values) log.Quantiles <- -log10(p_value_quantiles) index=GAPIT.Pruning(log.P.values,DPP=DPP) log.P.values=log.P.values[index ] log.Quantiles=log.Quantiles[index] if(plot.style=="FarmCPU"){ pdf(paste("FarmCPU.", name.of.trait,".QQ-Plot.pdf" ,sep = ""),width = 5,height=5) par(mar = c(5,6,5,3)) } if(plot.style=="rainbow"){ pdf(paste("GAPIT.", name.of.trait,".QQ-Plot.pdf" ,sep = ""),width = 5,height=5) par(mar = c(5,6,5,3)) } #Add conficence interval N1=length(log.Quantiles) ## create the confidence intervals c95 <- rep(NA,N1) c05 <- rep(NA,N1) for(j in 1:N1){ i=ceiling((10^-log.Quantiles[j])*N) if(i==0)i=1 c95[j] <- qbeta(0.95,i,N-i+1) c05[j] <- qbeta(0.05,i,N-i+1) #print(c(j,i,c95[j],c05[j])) } #CI Lines #plot(log.Quantiles, -log10(c05), xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), type="l",lty=5, axes=FALSE, xlab="", ylab="",col="black") #par(new=T) #plot(log.Quantiles, -log10(c95), xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), type="l",lty=5, axes=FALSE, xlab="", ylab="",col="black") #CI shade plot(NULL, xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), type="l",lty=5, lwd = 2, axes=FALSE, xlab="", ylab="",col="gray") index=length(c95):1 polygon(c(log.Quantiles[index],log.Quantiles),c(-log10(c05)[index],-log10(c95)),col='gray',border=NA) #Diagonal line abline(a = 0, b = 1, col = "red",lwd=2) #data par(new=T) if(plot.style=="FarmCPU"){ plot(log.Quantiles, log.P.values, cex.axis=1.1, cex.lab=1.3, lty = 1, lwd = 2, col = "Black" ,bty='l', xlab =expression(Expected~~-log[10](italic(p))), ylab = expression(Observed~~-log[10](italic(p))), main = paste(name.of.trait,sep=""),pch=20) } if(plot.style=="rainbow"){ plot(log.Quantiles, log.P.values, xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), cex.axis=1.1, cex.lab=1.3, lty = 1, lwd = 2, col = "Blue" ,xlab =expression(Expected~~-log[10](italic(p))),ylab = expression(Observed~~-log[10](italic(p))), main = paste(name.of.trait,sep="")) } dev.off() } if(plot.type == "P_values") { pdf(paste("QQ-Plot_", name.of.trait,".pdf" ,sep = "")) par(mar = c(5,5,5,5)) qqplot(p_value_quantiles, P.values, xlim = c(0,1), ylim = c(0,1), type = "l" , xlab = "Uniform[0,1] Theoretical Quantiles", lty = 1, lwd = 1, ylab = "Quantiles of P-values from GWAS", col = "Blue", main = paste(name.of.trait,sep=" ")) abline(a = 0, b = 1, col = "red") dev.off() } #print("GAPIT.QQ accomplished successfully!") } #============================================================================================= `GAPIT` <- function(Y=NULL,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,CV.Inheritance=NULL,GP=NULL,GK=NULL, group.from=1000000 ,group.to=1000000,group.by=20,DPP=100000, kinship.cluster="average", kinship.group='Mean',kinship.algorithm="VanRaden", bin.from=10000,bin.to=10000,bin.by=10000,inclosure.from=10,inclosure.to=10,inclosure.by=10, SNP.P3D=TRUE,SNP.effect="Add",SNP.impute="Middle",PCA.total=0, SNP.fraction = 1, seed = NULL, BINS = 20,SNP.test=TRUE, SNP.MAF=0,FDR.Rate = 1, SNP.FDR=1,SNP.permutation=FALSE,SNP.CV=NULL,SNP.robust="GLM", file.from=1, file.to=1, file.total=NULL, file.fragment = 99999,file.path=NULL, file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, ngrid = 100, llim = -10, ulim = 10, esp = 1e-10,LD.chromosome=NULL,LD.location=NULL,LD.range=NULL,PCA.col=NULL,PCA.3d=FALSE,NJtree.group=NULL,NJtree.type=c("fan","unrooted"), sangwich.top=NULL,sangwich.bottom=NULL,QC=TRUE,GTindex=NULL,LD=0.1,plot.bin=10^5, file.output=TRUE,cutOff=0.01, Model.selection = FALSE,output.numerical = FALSE, output.hapmap = FALSE, Create.indicator = FALSE,Multi_iter=TRUE,num_regwas=10, QTN=NULL, QTN.round=1,QTN.limit=0, QTN.update=TRUE, QTN.method="Penalty", Major.allele.zero = FALSE,Random.model=FALSE, method.GLM="FarmCPU.LM",method.sub="reward",method.sub.final="reward",method.bin="static",bin.size=c(1000000),bin.selection=c(10,20,50,100,200,500,1000), memo=NULL,Prior=NULL,ncpus=1,maxLoop=3,threshold.output=.01,Inter.Plot=FALSE,Inter.type=c("m","q"), WS=c(1e0,1e3,1e4,1e5,1e6,1e7),alpha=c(.01,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,1),maxOut=100,QTN.position=NULL,CG=NULL, converge=1,iteration.output=FALSE,acceleration=0,iteration.method="accum",PCA.View.output=TRUE,Geno.View.output=TRUE,plot.style="Oceanic",SUPER_GD=NULL,SUPER_GS=FALSE, h2=NULL,NQTN=NULL,QTNDist="normal",effectunit=1,category=1,r=0.25,cveff=NULL,a2=0,adim=2,Multiple_analysis=FALSE, model="MLM",Para=NULL ){ #Object: To perform GWAS and GPS (Genomic Prediction/Selection) #Designed by Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novenber 3, 2016 ############################################################################################## print("--------------------- Welcome to GAPIT ----------------------------") echo=TRUE GAPIT.Version=GAPIT.0000() # if(!is.null(model))if(!match(model,c("MLM","CMLM","SUPER","GLM","FarmCPU","Blink","MLMM","gBLUP","cBLUP","sBLUP"))) stop(paste("PLease choose one model from ","MLM","CMLM","SUPER","GLM","FarmCPU","Blink","gBLUP","cBLUP","sBLUP",sep="")) #Allow either KI or K, but not both if(model%in%c("gBLUP","cBLUP","sBLUP")) { SNP.test=FALSE SUPER_GS=TRUE } if(!is.null(KI)&is.null(GD)&is.null(G)&is.null(file.G)&is.null(file.GD))SNP.test=FALSE model_store=model for(m in 1:length(model_store)) { model=model_store[m] if(!is.null(Y)) { if(group.from1)Para$memo=model print(Para$memo) GAPIT_list=list(group.from=group.from ,group.to=group.to,group.by=group.by,DPP=DPP,kinship.cluster=kinship.cluster, kinship.group=kinship.group,kinship.algorithm=kinship.algorithm, bin.from=bin.from,bin.to=bin.to,bin.by=bin.by,inclosure.from=inclosure.from,inclosure.to=inclosure.to,inclosure.by=inclosure.by,SNP.P3D=SNP.P3D,SNP.effect=SNP.effect,SNP.impute=SNP.impute,PCA.total=PCA.total, SNP.fraction = SNP.fraction, seed = seed, BINS = 20,SNP.test=SNP.test, SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate, SNP.FDR=SNP.FDR,SNP.permutation=SNP.permutation,SNP.CV=NULL,SNP.robust="GLM",file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment,file.path=file.path, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,ngrid = 100, llim = -10, ulim = 10, esp = 1e-10,Inter.Plot=Inter.Plot,Inter.type=Inter.type, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,PCA.col=PCA.col,PCA.3d=PCA.3d,NJtree.group=NJtree.group,NJtree.type=NJtree.type, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,QC=QC,GTindex=GTindex,LD=LD,plot.bin=plot.bin,file.output=file.output,cutOff=cutOff, Model.selection = Model.selection,output.numerical = output.numerical, output.hapmap = output.hapmap, Create.indicator = Create.indicator,QTN=QTN, QTN.round=1,QTN.limit=0, QTN.update=TRUE, QTN.method="Penalty", Major.allele.zero = Major.allele.zero, method.GLM=method.GLM,method.sub=method.sub,method.sub.final="reward",method.bin="static",bin.size=bin.size,bin.selection=bin.selection,model=model,Random.model=Random.model, h2=h2,NQTN=NQTN,QTNDist="normal",effectunit=effectunit,category=category,r=r,cveff=NULL,a2=0,adim=2,Multi_iter=Multi_iter,num_regwas=num_regwas, memo="",Prior=NULL,ncpus=1,maxLoop=maxLoop,threshold.output=threshold.output,WS=c(1e0,1e3,1e4,1e5,1e6,1e7),alpha=alpha,maxOut=100,QTN.position=QTN.position,CG=CG, converge=converge,iteration.output=iteration.output,acceleration=0,iteration.method="accum",PCA.View.output=PCA.View.output,Geno.View.output=Geno.View.output,plot.style="Oceanic",SUPER_GD=NULL,SUPER_GS=SUPER_GS,Multiple_analysis=Multiple_analysis) G_list_M=rownames(as.matrix(GAPIT_list)) P_list_M=rownames(as.matrix(Para)) Para=c(GAPIT_list[!G_list_M%in%P_list_M],Para) #print(Para$kinship.algorithm) if(SUPER_GS==TRUE)Para$SNP.test=FALSE IC=NULL #GAPIT.Version=GAPIT.0000() print("--------------------Processing traits----------------------------------") if(!is.null(Y)){ print("Phenotype provided!") if(ncol(Y)<2) stop ("Phenotype should have taxa name and one trait at least. Please correct phenotype file!") if(m==1) { DP=GAPIT.DP(G=G,GD=GD,GM=GM,KI=KI,Z=Z,CV=CV,CV.Inheritance=Para$CV.Inheritance,GP=GP,GK=GK, group.from=Para$group.from ,group.to= Para$group.to,group.by=Para$group.by,DPP= Para$DPP, kinship.cluster=Para$kinship.cluster, kinship.group=Para$kinship.group,kinship.algorithm=Para$ kinship.algorithm, NJtree.group=Para$NJtree.group,NJtree.type=Para$NJtree.type,plot.bin=Para$plot.bin,PCA.col=Para$PCA.col,PCA.3d=Para$PCA.3d, sangwich.top=Para$sangwich.top,sangwich.bottom=Para$sangwich.bottom,LD=Para$LD,bin.from= Para$bin.from,bin.to= Para$bin.to,bin.by= Para$bin.by,inclosure.from= Para$inclosure.from,inclosure.to= Para$inclosure.to,inclosure.by= Para$inclosure.by, SNP.P3D= Para$SNP.P3D,SNP.effect= Para$SNP.effect,SNP.impute= Para$SNP.impute,PCA.total= Para$PCA.total, SNP.fraction = Para$SNP.fraction, seed = Para$seed, BINS = Para$BINS,SNP.test=Para$SNP.test, SNP.MAF= Para$SNP.MAF,FDR.Rate = Para$FDR.Rate, SNP.FDR= Para$SNP.FDR,SNP.permutation= Para$SNP.permutation, SNP.CV= Para$SNP.CV,SNP.robust= Para$SNP.robust, Inter.Plot=Para$Inter.Plot, Inter.type=Para$Inter.type, file.from= Para$file.from, file.to=Para$file.to, file.total= Para$file.total, file.fragment = Para$file.fragment,file.path= Para$file.path, file.G= Para$file.G, file.Ext.G= Para$file.Ext.G,file.GD= Para$file.GD, file.GM= Para$file.GM, file.Ext.GD= Para$file.Ext.GD,file.Ext.GM= Para$file.Ext.GM, ngrid = Para$ngrid, llim = Para$llim, ulim = Para$ulim, esp = Para$esp,Multi_iter=Para$Multi_iter,num_regwas=Para$num_regwas, LD.chromosome= Para$LD.chromosome,LD.location= Para$LD.location,LD.range= Para$LD.range, QC= Para$QC,GTindex= Para$GTindex,cutOff=Para$cutOff, Model.selection = Para$Model.selection,output.numerical = Para$output.numerical,Random.model=Para$Random.model, Create.indicator = Para$Create.indicator,QTN= Para$QTN, QTN.round= Para$QTN.round,QTN.limit= Para$QTN.limit, QTN.update= Para$QTN.update, QTN.method= Para$QTN.method, Major.allele.zero = Para$Major.allele.zero, method.GLM=Para$ method.GLM,method.sub= Para$method.sub,method.sub.final= Para$method.sub.final, method.bin= Para$method.bin,bin.size= Para$bin.size,bin.selection= Para$bin.selection, memo= Para$memo,Prior= Para$Prior,ncpus=Para$ncpus,maxLoop= Para$maxLoop,threshold.output= Para$threshold.output, WS= Para$WS,alpha= Para$alpha,maxOut= Para$maxOut,QTN.position= Para$QTN.position, converge=Para$converge,iteration.output= Para$iteration.output,acceleration=Para$acceleration, iteration.method= Para$iteration.method,PCA.View.output=Para$PCA.View.output, output.hapmap = Para$output.hapmap, file.output= Para$file.output,Geno.View.output=Para$Geno.View.output,plot.style=Para$plot.style,SUPER_GD= Para$SUPER_GD,SUPER_GS= Para$SUPER_GS,CG=Para$CG,model=model) }else{ DP$kinship.algorithm=Para$ kinship.algorithm DP$group.from=Para$group.from DP$group.to=Para$group.to DP$group.by=Para$group.by DP$sangwich.top=Para$sangwich.top DP$sangwich.bottom=Para$sangwich.bottom DP$bin.from= Para$bin.from DP$bin.to= Para$bin.to DP$bin.by= Para$bin.by DP$inclosure.from= Para$inclosure.from DP$inclosure.to= Para$inclosure.toDP$inclosure.by= Para$inclosure.by } for (trait in 2: ncol(Y)) { traitname=colnames(Y)[trait] ###Statistical distributions of phenotype ###Correlation between phenotype and principal components print(paste("Processing trait: ",traitname,sep="")) if(!is.null(Para$memo)) traitname=paste(Para$memo,".",traitname,sep="") if(!is.null(Y) & Para$file.output)ViewPhenotype<-GAPIT.Phenotype.View(myY=Y[,c(1,trait)],traitname=traitname,memo=Para$memo) Judge=GAPIT.Judge(Y=Y[,c(1,trait)],G=DP$G,GD=DP$GD,KI=DP$KI,GM=DP$GM,group.to=DP$group.to,group.from=DP$group.from,sangwich.top=DP$sangwich.top,sangwich.bottom=DP$sangwich.bottom,kinship.algorithm=DP$kinship.algorithm,PCA.total=DP$PCA.total,model=DP$model,SNP.test=DP$SNP.test) DP$group.from=Judge$group.from DP$group.to=Judge$group.to DP$name.of.trait=traitname DP$Y=Y[,c(1,trait)] DP$model=model print(Para$SNP.test) IC=GAPIT.IC(DP=DP) SS=GAPIT.SS(DP=DP,IC=IC) if(Para$SNP.test==TRUE)ID=GAPIT.ID(DP=DP,IC=IC,SS=SS) }#for loop trait #print(SNP.test) print("GAPIT accomplished successfully for multiple traits. Result are saved") print("It is OK to see this: 'There were 50 or more warnings (use warnings() to see the first 50)'") out <- list() out$QTN<-QTN.position out$GWAS<-SS$GWAS out$Pred<-SS$Pred out$QTN<-IC$QTN out$Power<-SS$Power out$FDR<-SS$FDR out$Power.Alpha<-SS$Power.Alpha out$alpha<-SS$alpha out$mc=SS$mc out$bc=SS$bc out$mp=SS$mp out$h2=SS$h2 out$PCA=IC$PCA out$GD=DP$GD out$GM=DP$GM out$KI=IC$K out$GM=DP$GM if(Para$SNP.test==TRUE)names(out$GWAS$P.value)="mp" if(kinship.algorithm=="FarmCPU")names(out$Pred)=c("Taxa",traitname,"Prediction") #return (out) }else{# is.null(Y) #print(Para$SNP.MAF) out <- list() # if(is.null(Para$NQTN)&is.null(Para$h2)) # { # Para$kinship.algorithm="SUPER" # Para$PCA.total=0 # } #print(Para$kinship.algorithm) #print(Para$PCA.total) myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,kinship.algorithm=Para$kinship.algorithm,PCA.total=Para$PCA.total,SNP.fraction=Para$SNP.fraction,SNP.test=Para$SNP.test, file.path=Para$file.path,file.from=Para$file.from, file.to=Para$file.to, file.total=Para$file.total, file.fragment = Para$file.fragment, file.G=Para$file.G, file.Ext.G=Para$file.Ext.G,file.GD=Para$file.GD, file.GM=Para$file.GM, file.Ext.GD=Para$file.Ext.GD,file.Ext.GM=Para$file.Ext.GM, SNP.MAF=Para$SNP.MAF,FDR.Rate = Para$FDR.Rate,SNP.FDR=Para$SNP.FDR,SNP.effect=Para$SNP.effect,SNP.impute=Para$SNP.impute,NJtree.group=Para$NJtree.group,NJtree.type=Para$NJtree.type, LD.chromosome=Para$LD.chromosome,LD.location=Para$LD.location,LD.range=Para$LD.range,GP=Para$GP,GK=Para$GK,bin.size=NULL,inclosure.size=NULL, sangwich.top=NULL,sangwich.bottom=Para$sangwich.bottom,GTindex=NULL,file.output=Para$file.output, Create.indicator = Para$Create.indicator, Major.allele.zero = Para$Major.allele.zero,Geno.View.output=Para$Geno.View.output,PCA.col=Para$PCA.col,PCA.3d=Para$PCA.3d) GD=myGenotype$GD GI=myGenotype$GI GT=myGenotype$GT #G=myGenotype$G chor_taxa=myGenotype$chor_taxa rownames(GD)=GT colnames(GD)=GI[,1] taxa=GT #print(GD[1:5,1:5]) if(Para$output.numerical) { write.table(cbind(taxa,GD), "GAPIT.Genotype.Numerical.txt", quote = FALSE, sep = "\t", row.names = F,col.names = T) write.table(GI, "GAPIT.Genotype.map.txt", quote = FALSE, sep = "\t", row.names = F,col.names = T) } if(Para$output.hapmap) write.table(myGenotype$G, "GAPIT.Genotype.hmp.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) #GD=cbind(as.data.frame(GT),GD) if(!is.null(seed))set.seed(seed) #print(Para$NQTN) if(!is.null(Para$NQTN)&!is.null(Para$h2)) { myG_simulation<-GAPIT.Phenotype.Simulation(GD=cbind(as.data.frame(myGenotype$GT),myGenotype$GD),GM=myGenotype$GI,h2=Para$h2,NQTN=Para$NQTN,QTNDist=Para$QTNDist,effectunit=Para$effectunit,category=Para$category,r=Para$r,cveff=Para$cveff,a2=Para$a2,adim=Para$adim) out=c(out,myG_simulation) } out$GD=data.frame(cbind(as.data.frame(GT),as.data.frame(GD))) out$GM=GI out$G=myGenotype$G out$kinship=myGenotype$KI out$PCA=myGenotype$PC }# is.null(Y) }#end of model loop #print(tail(IC$GM)) if(!is.null(Y)&SNP.test)if(Multiple_analysis&Para$file.output&length(model_store)*(ncol(Y)-1)>1&length(model_store)*(ncol(Y)-1)<9) { #print(DP$QTN.position) GMM=GAPIT.Multiple.Manhattan(model_store=model_store,Y=Y,GM=IC$GM,seqQTN=DP$QTN.position,cutOff=DP$cutOff) #print(str(GMM$multip_mapP)) GAPIT.Circle.Manhatton.Plot(band=1,r=3,GMM$multip_mapP,plot.type=c("c","q"),signal.line=1,xz=GMM$xz,threshold=DP$cutOff) }# end of mutiple manhantton plot # if(!is.null(Y)&!SNP.test&Multiple_analysis&Para$file.output&length(model_store)*(ncol(Y)-1)>1) # { # GAPIT.Interactive.GS(model_store=model_store,Y=Y) # } return (out) } #end of GAPIT function `GAPIT.ROC` <- function(t=NULL,se=NULL,Vp=1,trait="",plot.style="rainbow"){ #Object: To make table and plot for ROC (power vs FDR) #Input: t and se are the vectors of t value and their standard error #Input: Vp is phenotypic variance and trait is name of the phenotype #Output: A table and plot #Requirment: error df is same for all SMP or large #Authors: Zhiwu Zhang # Last update: Feb 11, 2013 ############################################################################################## #print("GAPIT.ROC start") #print("Length of t se and Vp") #print(length(t)) #print(length(se)) #print((Vp)) if(length(t)==length(t[is.na(t)]) ){ #print("NA t, No ROC plot") return(NULL) } #test #n=1000 #trait="test" #t=rnorm(n) #se=sqrt(abs(rnorm(n)) ) #Vp=10 #Remove NAs index=is.na(t) t=t[!index] se=se[!index] #print(head(cbind(t,se))) #Configration FDR=c(0,.01,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,1) coefficient=c(0,0.01,.02,.05,.1,.2,.3) #Power holder nf=length(FDR) nc=length(coefficient) power=matrix(NA,nf,nc) #Handler of matrix format if(!is.null(dim(t))) t=t[,1] if(!is.null(dim(se))) se=se[,1] n=length(t) #Discard negative t=abs(t) #print("@@@@@@@@@@@@@@") #sort t and se position=order(t,decreasing = TRUE) t=t[position] se=se[position] EFFECT=coefficient*sqrt(Vp) newbit=matrix(1/se,n,1)%*%EFFECT #n by nc matrix tnew=newbit+t #n by nc matrix for (i in 1:nf){ fdr=FDR[i] cutpoint=floor(n*fdr) cutoff=t[cutpoint] for (j in 1:nc){ effect= EFFECT[j] singnificant=tnew[,j]>cutoff count=length(t[singnificant]) power[i,j]=count/n } #end of for on fdr } #end of for on effect #output rownames(power)=FDR tkk<-c(.3,.2,.1,.05,.02,0.01,0) tc1<-c(0,0.25,0.5,0.75,1.0) #colnames(power)=paste("QTN=",coefficient,sep="") colnames(power)=paste("QTN=",tkk,sep="") if(plot.style=="FarmCPU"){ write.table(power,file=paste("FarmCPU.",trait,".ROC.csv",sep=""),quote = TRUE, sep = ",", row.names = TRUE,col.names = NA) } if(plot.style=="rainbow"){ write.table(power,file=paste("GAPIT.",trait,".ROC.csv",sep=""),quote = TRUE, sep = ",", row.names = TRUE,col.names = NA) } FDR_log<-FDR/10 #palette(c("black","red","blue","brown", "orange","cyan", "green",rainbow(nc))) if(plot.style=="FarmCPU"){ pdf(paste("FarmCPU.", trait,".ROC.pdf" ,sep = ""), width = 5,height=5) par(mar = c(5,6,5,3)) } if(plot.style=="rainbow"){ pdf(paste("GAPIT.", trait,".ROC.pdf" ,sep = ""), width = 7,height=7) par(mar = c(5,5,5,3)) } palette(c("black","red","blue","brown", "orange","cyan", "green",rainbow(nc))) plot(FDR_log,power[,1],log="x",type="o",yaxt="n",lwd=3,col=1,xlab="Type I error",ylab="Power",main = trait,cex.axis=1.3, cex.lab=1.3) axis(side=2,at=tc1,labels=tc1,cex.lab=1.3,cex.axis=1.3) for(i in 2:nc){ lines(power[,i]~FDR_log, lwd=3,type="o",pch=i,col=i) } #legend("bottomright", colnames(power), pch = c(1:nc), lty = c(1,2),col=c(1:nc)) legend("bottomright", colnames(power), pch = c(nc:1), lty = c(1,2),col=c(nc:1),lwd=2,bty="n") palette("default") # reset back to the default #print("@@@@@@@@@@@@@@") #print(power) dev.off() print("ROC completed!") } #GAPIT.ROC ends here #============================================================================================= `GAPIT.RandomModel` <- function(GWAS,Y,CV=NULL,X,cutOff=0.01,GT=NULL,n_ran=30){ #Object: To calculate the genetics variance ratio of Candidate Genes #Output: The genetics variance raito between CG and total #Authors: Jiabo Wang and Zhiwu Zhang # Last update: Nov 6, 2019 ############################################################################################## if(!require(lme4)) install.packages("lme4") library("lme4") #GWAS=myGAPIT_GLM$GWAS #CV=myGAPIT_GLM$PCA #cut.set=0.01 #return(list(GVs=NULL)) print("GAPIT.RandomModel beginning...") if(is.null(GT))GT=as.character(Y[,1]) name.of.trait=colnames(Y)[2] cutoff=cutOff/nrow(GWAS) P.value=as.numeric(GWAS[,4]) P.value[is.na(P.value)]=1 index=P.valuen_ran) { print("The candidate markers are more than threshold value !") return(list(GVs=NULL)) } if(!is.null(CV)) { #ff <- paste("trait~1+PC1+PC2+PC3+(1|gene_1)+(1|gene_2)+(1|gene_3)+(1|gene_4)+(1|gene_5)+(1|gene_6)" #dflme <- lmer(ff, data=tree2) if(ncol(CV)==1) { command0=paste("trait~1",sep="") command1=command0 command2=command1 for(j in 1:n_gd) { command2=paste(command2,"+(1|gene_",j,")",sep="") } }else{ command0=paste("trait~1",sep="") command1=command0 for(i in 1:n_cv) { command1=paste(command1,"+CV",i,sep="") } command2=command1 for(j in 1:n_gd) { command2=paste(command2,"+(1|gene_",j,")",sep="") } } }else{ command0=paste("trait~1",sep="") command1=command0 command2=command1 for(j in 1:n_gd) { command2=paste(command2,"+(1|gene_",j,")",sep="") } } #command3=paste(command2,"+(1|gene_",j,")",sep="") dflme <- lmer(command2, data=tree2,control=lmerControl(check.nobs.vs.nlev = "ignore", check.nobs.vs.rankZ = "ignore", check.nobs.vs.nRE="ignore")) carcor_matrix=as.data.frame(summary(dflme)$varcor) var_gene=as.numeric(carcor_matrix[1:(nrow(carcor_matrix)-1),4]) var_res=carcor_matrix[nrow(carcor_matrix),4] print(paste("Candidate Genes could explain genetics variance :",sep="")) print(var_gene/sum(var_gene+var_res)) v_rat=var_gene/sum(var_gene+var_res) gene_list=cbind(geneGWAS,v_rat) colnames(gene_list)[ncol(gene_list)]="Variance_Explained" write.csv(gene_list,paste("GAPIT.", name.of.trait,".Phenotype_Variance_Explained_by_Association_Markers.csv",sep=""),quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #gene_list=read.csv("GAPIT.Weight.GrowthIntercept.Phenotype_Variance_Explained_by_Association_Markers.csv",head=T) if(!is.na(sum(gene_list[1,c(4:8)]))) { pdf(paste("GAPIT.", name.of.trait,".Effect_VP.pdf" ,sep = ""), width = 7,height=5.75) par(mar=c(4,5,4,4),cex=0.8) gene_list=gene_list[order(gene_list$effect),] plot(gene_list$effect,gene_list$Variance_Explained, xlab="Estimated Effect", ylab="Variance Explained of Phenotype" ) dev.off() pdf(paste("GAPIT.", name.of.trait,".MAF_VP.pdf" ,sep = ""), width = 7,height=5.75) par(mar=c(4,5,4,4),cex=0.8) gene_list=gene_list[order(gene_list$maf),] plot(gene_list$maf,gene_list$Variance_Explained,xlab="MAF",ylab="Variance Explained of Phenotype") dev.off() if(n_gd>=10) { pdf(paste("GAPIT.", name.of.trait,".MAF_Effect_VP.pdf" ,sep = ""), width = 9,height=5.75) n=10 layout(matrix(c(1,1,2,1,1,1,1,1,1),3,3,byrow=TRUE), c(2,1), c(1,1), TRUE) do_color=colorRampPalette(c("green", "red"))(n) par(mar=c(4,5,2,8),cex=0.8) y=gene_list$maf x=gene_list$effect x.lim=max(x)+max(x)/10 y.lim=max(y)+max(y)/10 z=gene_list$Variance_Explained quantile_cut=quantile(z) r2_color=rep("black",n_gd) for(i in 1:(n/2)) { r2_color[z<=quantile_cut[i+1]&z>=quantile_cut[i]]=do_color[2*i] } plot(y~x,type="p", ylim=c(0,y.lim), xlim = c(min(x), max(x)),col = r2_color, xlab = "",ylab = "", cex.lab=1.2,pch=21,bg=r2_color) mtext("Estimated Effect",side=1,line=2.5) mtext("MAF",side=2,line=2.5) par(mar=c(2,13,5,4),cex=0.5) barplot(matrix(rep(0.4,times=n),n,1),beside=T,col=do_color,border=do_color,axes=FALSE,horiz =T) #legend(x=10,y=2,legend=expression(R^"2"),,lty=0,cex=1.3,bty="n",bg=par("bg")) step=length(seq(0,round(max(z),3),by=0.01)) small_bar=round(seq(0,round(max(z),3),by=(max(z)-min(z))/10),2) #main() mtext("Variance Explained",side=2,line=0.4,col="black",cex=0.5) axis(4,c(1,6,11),c(min(small_bar),median(small_bar),max(small_bar)),las=2,cex.axis = 0.9,tick=F,line=0) dev.off() } } return(list(GVs=var_gene/sum(var_gene+var_res))) }#end of GAPIT.RandomModel function #============================================================================================= `GAPIT.RemoveDuplicate` <- function(Y){ #Object: NA #Output: NA #Authors: Zhiwu Zhang # Last update: Augus 30, 2011 ############################################################################################## return (Y[match(unique(Y[,1]), Y[,1], nomatch = 0), ] ) } #============================================================================================= `GAPIT.Report` <- function(name.of.trait=NULL,GWAS=NULL,pred=pred,ypred=NULL,tvalue=NULL,stderr=NULL,Vp=1, DPP=100000,cutOff=.01,threshold.output=.01,MAF=NULL,seqQTN=NULL,MAF.calculate=FALSE,plot.style="rainbow"){ #Object: Out put plots and tables #Input: GWAS,name.of.trait, DPP #Requirement: None #Output: Graphs and tables #Output: return ycor if ypred is not null #Authors: Zhiwu Zhang # Date start: April 2, 2013 # Last update: April 2, 2013 ############################################################################################## #print("GAPIT.Report Started") #print(seqQTN) #Manhattan Plots #print("Manhattan plot (Genomewise)..." ) if(plot.style=="FarmCPU"){ GAPIT.Manhattan(GI.MP = GWAS[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=seqQTN,plot.style=plot.style) } if(plot.style=="rainbow"){ GAPIT.Manhattan(GI.MP = GWAS[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=seqQTN,plot.style=plot.style) #} #print("Manhattan plot (Chromosomewise)..." ) GAPIT.Manhattan(GI.MP = GWAS[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff,plot.style=plot.style) } #QQ plots #print("QQ plotting..." ) #if(plot.style=="rainbow"){ # GAPIT.QQ(P.values = GWAS[,4], name.of.trait = name.of.trait,DPP=DPP) #} #if(plot.style=="nature"){ GAPIT.QQ(P.values = GWAS[,4], name.of.trait = name.of.trait,DPP=DPP,plot.style=plot.style) #} #Association Table #print("Create association table..." ) index=1:nrow(GWAS) if(threshold.output<1)index=which(GWAS[,4]0)&(floor(i/500)==i/500)) print(paste("SNP: ",i," ",sep="")) #No variation on the SNP if(min(xs[,i])==max(xs[,i])) { dfs[i,j] <- n - q1 betavalue[i,j]=0 stats[i,j] <- 0 } #The SNP has variation if(min(xs[,i])!=max(xs[,i])) { #SUPER snp.corr=cor(xs[,i],snp.pool) index.k=which( abs(snp.corr)<=LDsqr ) #handler of snp correlated with all QTNs if(length(index.k)<2){ index.k=1:length(snp.corr) #keep going to have them all #print("warning: there is a snp correlated with all QTNs") } K.X= snp.pool[,index.k] #################### K.X.svd= svd(K.X) ###start 2012.4.16 by qishan d=K.X.svd$d d=d[d>1e-8] d=d^2 U1=K.X.svd$u U1=U1[,1:length(d)] ### end 2012.4.16 by qishan n<-nrow(U1) I= diag(1,nrow(U1)) ################ get iXX X <- cbind(X0, xs[,i]) ####marker by column U <- U1*matrix(sqrt(1/(d + delta)), nrow(U1), length(d), byrow = TRUE) Xt <- crossprod(U, X) XX1<- crossprod(Xt, Xt) XX2<- crossprod((I-tcrossprod(U1,U1))%*%X,(I-tcrossprod(U1,U1))%*%X)/delta #iXX<-solve(XX1+XX2) iXX <- try(solve(XX1+XX2),silent=T) if(inherits(iXX, "try-error")){ iXX <- ginv(XX1+XX2) } ################# end get ixx ################ begin get beta ################ #######get beta compnents 1 #U1TX=t(U1)%*%X U1TX=crossprod(U1,X) beta1=0 for(ii in 1:length(d)){ one=matrix(U1TX[ii,], nrow=1) dim(one) #beta=t(one)%*%one/(d[ii]+delta) beta=crossprod(one,one)/(d[ii]+delta) beta1= beta1+beta } #######get beta components 2 #IUX=(I-U1%*%t(U1))%*%X IUX=(I-tcrossprod(U1,U1))%*%X beta2=0 for(ii in 1:nrow(U1)){ one=matrix(IUX[ii,], nrow=1) dim(one) beta=t(one)%*%one beta2= beta2+beta } beta2<-beta2/delta #######get b3 #U1TY=t(U1)%*%ys[j,] U1TY=crossprod(U1,ys[j,]) beta3=0 for(ii in 1:length(d)){ one1=matrix(U1TX[ii,], nrow=1) one2=matrix(U1TY[ii,], nrow=1) beta=crossprod(one1,one2)/(d[ii]+delta) beta3= beta3+beta } ###########get beta4 #IUY=(I-U1%*%t(U1))%*%ys[j,] IUY=(I-tcrossprod(U1,U1))%*%ys[j,] beta4=0 for(ii in 1:nrow(U1)){ one1=matrix(IUX[ii,], nrow=1) one2=matrix(IUY[ii,], nrow=1) #beta=t(one1)%*%one2 beta=crossprod(one1,one2) beta4= beta4+beta } beta4<-beta4/delta #######get final beta beta=ginv(beta1+beta2)%*%(beta3+beta4) ############## ################ end get beta betavalue[i,j]=beta[q1,1] stats[i,j] <- beta[q1,1]/sqrt(iXX[q1, q1] * vg) dfs[i,j] <- n - q1 } #end of SNP variation stutus detection } #loop for markers #print("Calculating p-values...") ps[,j] <- 2 * pt(abs(stats[,j]), dfs[,j], lower.tail = FALSE) } #end of loop on traits return(list(beta=betavalue, ps = ps, stats = stats, dfs = dfs,effect=betavalue)) } #Enf of SUPERMLM ####################### if(method=="FaST"){ K.X.svd= svd(snp.pool) ###start 2012.4.16 by qishan d=K.X.svd$d d=d[d>1e-8] d=d^2 U1=K.X.svd$u U1=U1[,1:length(d)] ### end 2012.4.16 by qishan n<-nrow(U1) I= diag(1,nrow(U1)) U <- U1*matrix(sqrt(1/(d + delta)), nrow(U1), length(d), byrow = TRUE) ################## #Iteration on trait (j) and SNP (i) for(j in 1:g) { for (i in 1:m) { if((i >0)&(floor(i/500)==i/500)) print(paste("SNP: ",i," ",sep="")) #No variation on the SNP if(min(xs[,i])==max(xs[,i])) { dfs[i,j] <- n - q1 betavalue[i,j]=0 stats[i,j] <- 0 } #The SNP has variation if(min(xs[,i])!=max(xs[,i])) { #SUPER #################### K.X.svd= svd(snp.pool) ###start 2012.4.16 by qishan d=K.X.svd$d d=d[d>1e-8] d=d^2 U1=K.X.svd$u U1=U1[,1:length(d)] ### end 2012.4.16 by qishan n<-nrow(U1) I= diag(1,nrow(U1)) ################ get iXX X <- cbind(X0, xs[,i]) ####marker by column U <- U1*matrix(sqrt(1/(d + delta)), nrow(U1), length(d), byrow = TRUE) Xt <- crossprod(U, X) XX1<- crossprod(Xt, Xt) XX2<- crossprod((I-tcrossprod(U1,U1))%*%X,(I-tcrossprod(U1,U1))%*%X)/delta iXX <- try(solve(XX1+XX2),silent=T) if(inherits(iXX, "try-error")){ iXX <- ginv(XX1+XX2) } ################# end get ixx ################ begin get beta #######get beta compnents 1 #U1TX=t(U1)%*%X U1TX=crossprod(U1,X) beta1=0 for(ii in 1:length(d)){ one=matrix(U1TX[ii,], nrow=1) dim(one) beta=crossprod(one,one)/(d[ii]+delta) beta1= beta1+beta } #######get beta components 2 IUX=(I-tcrossprod(U1,U1))%*%X beta2=0 for(ii in 1:nrow(U1)){ one=matrix(IUX[ii,], nrow=1) dim(one) beta=crossprod(one,one) beta2= beta2+beta } beta2<-beta2/delta #######get b3 #U1TY=t(U1)%*%ys[j,] U1TY=crossprod(U1,ys[j,]) beta3=0 for(ii in 1:length(d)){ one1=matrix(U1TX[ii,], nrow=1) one2=matrix(U1TY[ii,], nrow=1) #beta=t(one1)%*%one2/(d[ii]+delta) beta=crossprod(one1,one2)/(d[ii]+delta) beta3= beta3+beta } ###########get beta4 #IUY=(I-U1%*%t(U1))%*%ys[j,] IUY=(I-tcrossprod(U1,U1))%*%ys[j,] beta4=0 for(ii in 1:nrow(U1)){ one1=matrix(IUX[ii,], nrow=1) one2=matrix(IUY[ii,], nrow=1) #beta=t(one1)%*%one2 beta=crossprod(one1,one2) beta4= beta4+beta } beta4<-beta4/delta #######get final beta beta=ginv(beta1+beta2)%*%(beta3+beta4) ############## ################ end get beta betavalue[i,j]=beta[q1,1] stats[i,j] <- beta[q1,1]/sqrt(iXX[q1, q1] * vg) dfs[i,j] <- n - q1 } #end of SNP variation stutus detection } #loop for markers #print("Calculating p-values...") ps[,j] <- 2 * pt(abs(stats[,j]), dfs[,j], lower.tail = FALSE) } #end of loop on traits return(list(beta=betavalue, ps = ps, stats = stats, dfs = dfs,effect=betavalue)) } #Enf of FastMLM }####end function #============================================================================================= `GAPIT.SUPER.GS`<- function(Y=Y[,c(1,trait)],G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,GK=GK,kinship.algorithm=kinship.algorithm, bin.from=bin.from,bin.to=bin.to,bin.by=bin.by,inclosure.from=inclosure.from,inclosure.to=inclosure.to,inclosure.by=inclosure.by, group.from=group.from,group.to=group.to,group.by=group.by,kinship.cluster=kinship.cluster,kinship.group=kinship.group,name.of.trait=traitname, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF= SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,PCA.total=PCA.total,GAPIT.Version=GAPIT.Version, GT=GT, SNP.fraction = SNP.fraction, seed = seed, BINS = BINS,SNP.test=SNP.test,DPP=DPP, SNP.permutation=SNP.permutation, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,SNP.CV=SNP.CV,SNP.robust=SNP.robust,model=model, genoFormat=genoFormat,hasGenotype=hasGenotype,byFile=byFile,fullGD=fullGD,PC=PC,GI=GI,Timmer = Timmer, Memory = Memory, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,QC=QC,GTindex=GTindex,LD=LD,file.output=file.output,cutOff=cutOff ){ #Object: To perform GPS with SUPER and Compress method #Designed by Zhiwu Zhang #Writen by Jiabo Wang #Last update: Novber 6, 2015 ###################################################### print("--------------------- Welcome to GAPIT SUPER GS----------------------------") Timmer=GAPIT.Timmer(Infor="GAPIT.SUPER.GS") Memory=GAPIT.Memory(Infor="GAPIT.SUPER.GS") if(!require(EMMREML)) install.packages("EMMREML") library(EMMREML) shortcut=FALSE LL.save=1e10 #In case of null Y and null GP, return genotype only thisY=Y[,2] thisY=thisY[!is.na(thisY)] if(length(thisY) <3){ shortcut=TRUE }else{ if(var(thisY) ==0) shortcut=TRUE } if(shortcut){ print(paste("Y is empty. No GWAS/GS performed for ",name.of.trait,sep="")) return (list(compression=NULL,kinship.optimum=NULL, kinship=KI,PC=PC,GWAS=NULL, GPS=NULL,Pred=NULL, REMLs=NULL,Timmer=Timmer,Memory=Memory)) } print("------------Examining data (QC)------------------------------------------") if(is.null(Y)) stop ("GAPIT says: Phenotypes must exist.") if(is.null(KI)&missing(GD) & kinship.algorithm!="SUPER") stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.") if(is.null(GD) & is.null(GT)) { GT=as.matrix(Y[,1]) GD=matrix(1,nrow(Y),1) GI=as.data.frame(matrix(0,1,3) ) colnames(GI)=c("SNP","Chromosome","Position") } #merge CV with PC #print(dim(CV)) #if(PCA.total>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC) #if(PCA.total>0&is.null(CV))CV=PC #for GS merge CV with GD name #print("!!!!!!") #print(dim(CV)) # print(head(GT)) # print(head(GI)) # if (is.null(CV)) # {my_allCV=CV # }else{ # taxa_GD=GT # my_allCV=CV[order(CV[,1]),] # my_allCV=my_allCV[my_allCV[,1]%in%taxa_GD,] # #print(dim(my_allCV)) # } my_allCV=CV #print(dim(my_allCV)) if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & is.null(Z)){ taxa=as.character(Y[,1]) Z=as.data.frame(diag(1,nrow(Y))) Z=rbind(taxa,Z) taxa=c('Taxa',as.character(taxa)) Z=cbind(taxa,Z) } if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & !is.null(Z)) { if(nrow(Z)-1nY){snpsam=sample(1:nG,nY)}else{snpsam=1:nG} GK=GD[GTindex,snpsam] SNPVar=apply(as.matrix(GK),2,var) #print(snpsam) if (snpsam==1)stop ("GAPIT says: SUPER_GS must putin GD and GM.") GK=GK[,SNPVar>0] GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa } #print(head(CV)) #myGD=cbind(as.data.frame(GT),as.data.frame(GD)) file.output.temp=file.output file.output=FALSE # print(memory.size()) GP=GAPIT.Bread(Y=Y,CV=CV,Z=Z,KI=KI,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.top,GTindex=GTindex,LD=LD,file.output=file.output)$GWAS file.output=file.output.temp # print(memory.size()) GK=NULL if(inclosure.to>nrow(Y)) ##########removed by Jiabo Wang ,unlimited number of inclosures { inclosure.to=nrow(Y)-1 print("the number of choosed inclosure is more than number of individuals") print("Set the number of choosed incolosure max equal to individuals") } if(inclosure.from>inclosure.to) ##########removed by Jiabo Wang ,unlimited number of inclosures { inclosure.from=inclosure.to } bin.level=seq(bin.from,bin.to,by=bin.by) inclosure=seq(inclosure.from,inclosure.to,by=inclosure.by) #print(inclosure) e=1 #################################number of bins and inclosure count=0 num_selection=length(bin.level)*length(inclosure) SUPER_selection=matrix(,num_selection,6) colnames(SUPER_selection)=c("bin","pseudo_QTNs","Max_pQTNs","REML","VA","VE") #for (bin in bin.level){bin=bin.level[e]} #for (inc in inclosure){inc=inclosure[e]} for (bin in bin.level){ for (inc in inclosure){ count=count+1 mySpecify=GAPIT.Specify(GI=GI,GP=GP,bin.size=bin,inclosure.size=inc) SNP.QTN=mySpecify$index num_pseudo_QTN=length(mySpecify$CB) num_bins=mySpecify$num_bins #print(paste("bin---",bin,"---inc---",inc,sep="")) GK=GD[GTindex,SNP.QTN] SUPER_GD=GD[,SNP.QTN] SNPVar=apply(as.matrix(GK),2,var) GK=GK[,SNPVar>0] SUPER_GD=SUPER_GD[,SNPVar>0] GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa SUPER_GD=cbind(as.data.frame(GT),as.data.frame(SUPER_GD)) #add taxa myBurger=GAPIT.Burger(Y=Y,CV=CV,GK=GK) #modifed by Jiabo Wang myREML=myBurger$REMLs myVG=myBurger$vg myVE=myBurger$ve SUPER_selection[count,1]=bin SUPER_selection[count,2]=num_pseudo_QTN SUPER_selection[count,3]=num_bins SUPER_selection[count,4]=myREML SUPER_selection[count,5]=myVG SUPER_selection[count,6]=myVE #print(SUPER_selection[count,]) if(count==1){ GK.save=GK LL.save=myREML SUPER_optimum_GD=SUPER_GD ########### get SUPER GD }else{ if(myREMLnk) { #group.to=min(nrow(KI),length(GTindex)) #maximum of group is number of rows in KI group.to=nk #maximum of group is number of rows in KI warning("The upper bound of groups is too high. It was set to the size of kinship!") } if(group.from>nk){ group.from=nk warning("The lower bound of groups is too high. It was set to the size of kinship!") } } if(!is.null(CV)){ if(group.to<=ncol(CV)+1) { #The minimum of group is number of columns in CV group.from=ncol(CV)+2 group.to=ncol(CV)+2 warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to their minimum and GLM is performed!") } } GROUP=seq(group.to,group.from,by=-group.by)#The reverse order is to make sure to include full model if(missing("kinship.cluster")) kinship.cluster=c("ward", "single", "complete", "average", "mcquitty", "median", "centroid") if(missing("kinship.group")) kinship.group=c("Mean", "Max", "Min", "Median") numSetting=length(GROUP)*length(kinship.cluster)*length(kinship.group) ys=as.matrix(Y[2]) X0=as.matrix(CV[,-1]) if(min(X0[,1])!=max(X0[,1])) X0 <- cbind(1, X0) #do not add overall mean if X0 has it already at first column hold_Z=Z # library("EMMREML") order_count=0 storage_reml=NULL Compression=matrix(,numSetting,6) colnames(Compression)=c("Type","Cluster","Group","REML","VA","VE") for (group in GROUP) { for (ca in kinship.cluster) { for (kt in kinship.group) { #if(group=1) group=2 #if(!optOnly) {print("Compressing and Genome screening..." )} order_count=order_count+1 if(order_count==1)print("-------Mixed model with Kinship-----------------------------") if(group0,] #Choose the most influencial bins as estimated QTNs #Handler of single row if(is.null(dim(ID.GP))) ID.GP=matrix(ID.GP,1,length(ID.GP)) ID.GP=ID.GP[order(as.numeric(as.vector(ID.GP[,5]))),] #sort on P alue #Handler of single row (again after reshape) if(is.null(dim(ID.GP))) ID.GP=matrix(ID.GP,1,length(ID.GP)) index=!is.na(ID.GP[,4]) ID.GP=ID.GP[index,4] #must have chr and bp information, keep SNP ID only num_bins=NULL if(!is.null(inclosure.size) ) { if(!is.na(inclosure.size)){ avaiable=min(inclosure.size,length(ID.GP)) #print("inclosure.size length(ID.GP) avaiable") #print(inclosure.size) #print(length(ID.GP)) num_bins=length(ID.GP) # create number of all bins #print(avaiable) if(avaiable==0){ ID.GP=-1 }else{ ID.GP=ID.GP[1:avaiable] #keep the top ones selected } #print("ID.GP") #print(ID.GP) #problem here ID.GP[1:0]==ID.GP[1:1] } } #create index in GI theIndex=NULL if(!is.null(GI)){ ID.GI=as.numeric(as.vector(GI[,3]))+as.numeric(as.vector(GI[,2]))*MaxBP #print("ID.GI") #print(ID.GI) theIndex=ID.GI %in% ID.GP } #print("Specification in process done") myList=list(index=theIndex,CB=ID.GP) return (list(index=theIndex,CB=ID.GP,num_bins=num_bins)) } #end of GAPIT.Specify #============================================================================================= `GAPIT.Table` <- function(final.table = final.table, name.of.trait = name.of.trait,SNP.FDR=1){ #Object: Make and export a table of summary information from GWAS #Output: A table summarizing GWAS results #Authors: Alex Lipka and Zhiwu Zhang # Last update: May 10, 2011 ############################################################################################## #Filter SNPs by FDR index=(final.table[,7]<=SNP.FDR) final.table=final.table[index,] #Export this summary table as an excel file write.table(final.table, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) #print("GAPIT.Table accomplished successfully!") } #GAPIT.Table ends here #============================================================================================= `GAPIT.Timmer` <- function(Timmer=NULL,Infor){ #Object: To report current time #Output: Timmer #Authors: Zhiwu Zhang # Last update: may 8, 2011 ############################################################################################## Time<- Sys.time() if(is.null(Timmer)) { Elapsed=0 Timmer=cbind(Infor,Time,Elapsed) }else{ Elapsed=0 Timmer.current=cbind(Infor,Time,Elapsed) Timmer=rbind(Timmer,Timmer.current) Timmer[nrow(Timmer),3]=as.numeric(as.matrix(Timmer[nrow(Timmer),2]))-as.numeric(as.matrix(Timmer[nrow(Timmer)-1,2])) } #print(paste('Time used: ', Timmer[nrow(Timmer),3], ' seconds for ',Infor,sep="" )) return (Timmer) }#end of GAPIT.Timmer function #============================================================================================= `GAPIT.ZmatrixCompress` <- function(Z,GAU){ #Object: To assign the fraction of a individual belonging to a group #Output: Z #Authors: Zhiwu Zhang # Last update: April 14, 2011 ############################################################################################## #Extraction of GAU coresponding to Z, sort GAU rowwise to mach columns of Z, and make design matrix #print("GAPIT.ZmatrixCompress") #print(dim(Z)) #print(dim(GAU)) effect.Z=as.matrix(Z[1,-1]) effect.GAU=as.matrix(GAU[,1]) taxa=as.data.frame(Z[-1,1]) GAU0=GAU[effect.GAU%in%effect.Z,] order.GAU=order(GAU0[,1]) GAU1 <- GAU0[order.GAU,] #id.1=GAU1[which(GAU1[,3]==1),4] id.1=GAU1[which(GAU1[,3]<2),4] n=max(as.numeric(as.vector(id.1))) x=as.numeric(as.matrix(GAU1[,4])) DS=diag(n)[x,] #sort Z column wise order.Z=order(effect.Z) Z=Z[-1,-1] Z <- Z[,order.Z] #Z matrix from individual to group #Z1.numeric <- as.numeric(as.matrix(Z)) Z <- matrix(as.numeric(as.matrix(Z)), nrow = nrow(Z), ncol = ncol(Z)) Z=Z%*%DS #Z3=data.frame(cbind(as.character(Z[-1,1]),Z2)) Z=data.frame(cbind(taxa,Z)) #Z=Z3[order(Z3[,1]),] Z=Z[order(as.matrix(taxa)),] #print("GAPIT.ZmatrixCompress accomplished successfully!") return(list(Z=Z)) }#The function GAPIT.ZmatrixCompress ends here #============================================================================================= `GAPIT.ZmatrixFormation` <- function(Z,Y){ #Object: To expande the proportion Z to final Z #Output: Z #Authors: Zhiwu Zhang # Last update: April 22, 2011 ############################################################################################## #split individuals in Y to the ones that are given Z and the one not taxa.Z=as.matrix(Z[-1,1]) taxa.Y=as.matrix(Y[,1]) taxa.diff=setdiff(taxa.Y,taxa.Z) taxa.I=as.matrix(taxa.Y[match(taxa.diff,taxa.Y,nomatch = 0)]) taxa.Z.col=as.matrix(Z[1,-1]) #Create final Z with zero block and identity block Z0=matrix(data=0,nrow=nrow(taxa.Z),ncol=nrow(taxa.I)) Z1=diag(1,nrow(taxa.I)) ZC=as.matrix(rbind(Z0,Z1)) #To label rows and columns label.row=rbind(as.matrix(Z[,1]),taxa.I) label.col=t(taxa.I) #update the zero block by the given Z matrix position=t(as.matrix(match(taxa.Z.col,taxa.I,nomatch = 0))) ZC[1:nrow(taxa.Z),position]=as.matrix(Z[-1,-1]) #habdler of parents do not have phenotype (colums of Z are not in taxa.I) # To do list #To form final Z matrix dataPart=rbind(label.col,ZC) Z=data.frame(cbind(label.row,dataPart)) #print("GAPIT.ZmatrixFormation accomplished successfully!") return(Z) }#The function GAPIT.ZmatrixFormation ends here #============================================================================================= `GAPIT.cross_validation.compare` <-function(myGD=NULL,y=NULL, rel=NULL,tc=NULL){ # Object: GAPIT.cross validation compare to different folders by replicate Times,result:a pdf of the scree barplot and .cvs # myGD:numeric SNP # Y: phenotype with columns of taxa,Y1,Y2... # rel:replications # tc:comparation folds number and value # Authors: You Tang,Jiabo Wang and You Zhou # Last update: December 31, 2014 ############################################################################################## if(is.null(myGD)||is.null(y)){stop("Validation Invalid. Please select read valid flies !")} if(is.null(rel)) { rel=10 #not input rel value,default replications number is 10 } if(rel<2){stop("Validation Invalid. Please select replications >1 !")} #rel<-2 ##replications #t<-2 y<-y[!is.na(y[,2]),] y<-y[,c(1,2)] y<- na.omit(y) ############# commonGeno <- unique(as.character(y[,1]))[unique(as.character(y[,1])) %in% myGD[,1]] cG<-data.frame(commonGeno) names(cG)<-"Taxa" colnames(y)<-c("Taxa","pheno") y2<-merge(y,cG,all.x=FALSE, all.y=TRUE, by = c("Taxa")) Z1 <- myGD[match(y2$Taxa,myGD[,1]),] myGD<- Z1 y<-y2 ############## X<-myGD[,-1] k1<-as.matrix(X) k2=GAPIT.kinship.VanRaden(snps=k1) myKI<-as.data.frame(k2) myKI<-cbind(myGD[,1],myKI) write.table(y,"Y.txt",quote=F,sep="\t",row.names=F,col.names=T) write.table(myKI,"K.txt",quote=F,row.names=F,col.names=F,sep="\t") gc() myK<- read.table("K.txt",head=F) y= read.table("Y.txt",head=T) y<- na.omit(y) y=y[(y[,1] %in% myK[,1]),] m=nrow(y) if(is.null(tc)) tc<-c(2,5,10,20,50) ##default compare to folders num tc1<-as.matrix(tc) allstorage.ref=matrix(0,rel,nrow(tc1)) allstorage.inf=matrix(0,rel,nrow(tc1)) for(w in 1:nrow(tc1)){ num<-tc1[w,] m.sample=floor(m/num) storage.ref=matrix(0,rel,num) storage.inf=matrix(0,rel,num) #storage.REML=matrix(0,rel,num) for(k in 1:rel) { #################Rand group method 1############ sets=sample(cut(1:nrow(y),num,labels=FALSE),nrow(y)) sets = as.data.frame(sets) ynew <- cbind(sets,y) #i=sample(1:num, size = 1) for(i in 1:num){ #use only genotypes that were genotyped and phenotyped ref <- y$Taxa[!ynew$sets==i] lines.cali<- ref # ycali<- y[match(ref,y$Taxa),] #use only genotypes that were genotyped and phenotyped test <- y$Taxa[ynew$sets==i] lines.vali<-test #yvali<- y[match(test,y$Taxa),] #################end Rand group method############ #use only genotypes that were genotyped and phenotyped commonGeno_v <- lines.vali[lines.vali %in% myK[,1]] yvali<- y[match(commonGeno_v,y[,1]),] #use only genotypes that were genotyped and phenotyped commonGeno_c <- lines.cali[lines.cali %in% myK[,1]] ycali<- y[match(commonGeno_c,y[,1]),] Y.raw=ycali[,c(1,2)]#choos a trait myY=Y.raw myKI=myK max.groups=m #Run GAPIT ############################################# myGAPIT <- GAPIT( Y=myY, KI=myKI, #group.from=max.groups, group.from=max.groups, group.to=max.groups, #group.by=10, PCA.total=3, SNP.test=FALSE, file.output=FALSE ) prediction=myGAPIT$Pred prediction.ref<-prediction[match(commonGeno_c,prediction$Taxa),] prediction.inf<-prediction[match(commonGeno_v,prediction$Taxa),] YP.ref <- merge(y, prediction.ref, by.x = 1, by.y = "Taxa") YP.inf <- merge(y, prediction.inf, by.x = 1, by.y = "Taxa") #Calculate correlation and store them r.ref=cor(as.numeric(as.vector(YP.ref[,2])),as.numeric(as.vector(YP.ref[,6]) )) r.inf=cor(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,6]) )) if(r.inf<0){ #r.inf=cor(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,2]+YP.inf[,6]))) combine_output=cbind(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,6]) )) write.csv(combine_output, paste("Accuracy_folders",num,k,i,rel,".csv",sep="")) #stop("...........") } storage.ref[k,i]=r.ref storage.inf[k,i]=r.inf print(paste(" rel= ", rel, " k= ",k," i= ",i,sep = "")) } print(paste("finish replications k= ",k," folders= ",num,sep = "")) } #Find missing position-->0.0 index=is.na(storage.inf) storage.inf[index]=0 allstorage.inf[,w]=as.matrix(rowMeans(storage.inf)) allstorage.ref[,w]=as.matrix(rowMeans(storage.ref)) #as.matrix(rowMeans(storage.ref)) ##output rel times and accuracy for every folders combine_output=cbind(storage.inf,allstorage.inf[,w]) combine_output1=cbind(storage.ref,allstorage.ref[,w]) colnames(combine_output)=c(paste("folders",c(1:num),sep=""),"mean") write.csv(combine_output, paste("Accuracy_folders",num,"by CMLM,rel_",rel,".csv",sep="")) write.csv(combine_output1, paste("Accuracy_folders ref",num,"by CMLM,rel_",rel,".csv",sep="")) } sr<-nrow(tc1) ##output means accuracy by rel for every folders colnames(allstorage.inf)=c(paste(tc1[c(1:sr),]," folders",sep="")) write.csv(allstorage.inf, paste("Accuracy_folders",nrow(tc1),"by CMLM,rel_",rel,".compare to means",".csv",sep="")) write.csv(allstorage.ref, paste("Accuracy_folders ref",nrow(tc1),"by CMLM,rel_",rel,".compare to means",".csv",sep="")) name.of.trait=noquote(names(Y.raw)[2]) #rrel=round(rel/2) #ppp<-matrix(0,sr,2) ppp<-matrix(0,sr,2) #if(rrel!=1){ # aarm<-colMeans(allstorage.inf[1:rrel,]) # }else{ # aarm<-allstorage.inf[1,] # } #aam<-colMeans(allstorage.inf) aam<-allstorage.inf aam<-data.frame(aam) bbm<-allstorage.ref bbm<-data.frame(bbm) for(b in 1:sr){ #ppp[b,]<-as.matrix(c(aarm[b],aam[b])) ppp[b,1]<-as.matrix(mean(aam[,b])) #colnames(ppp)<-c(rrel,rel) } for(c in 1:sr){ ppp[c,2]<-as.matrix(mean(bbm[,c])) } ppp<-as.matrix(cbind(ppp,tc1)) #colnames(ppp)<-c(rel) sj<-runif(1, 0, 1) #name.of.trait="qqq" pdf(paste("GAPIT.cross_validation ", name.of.trait,sj,".compare to different folders.", ".pdf", sep = ""),width = 4.5, height = 4,pointsize=9) par(mar = c(5,6,5,3)) palette(c("blue","red",rainbow(2))) plot(ppp[,3],ppp[,2],xaxt="n",ylim=c(0,1.04),xlim=c(min(tc1)-1,max(tc1)+1),bg="lightgray",xlab="Number of folds",ylab="Correlation",type="o",pch=1,col=1,cex=1.0,cex.lab=1.7, cex.axis=1.3, lwd=3,las=1,lty =2) axis(side=1,at=tc1,labels=tc1,cex.lab=1.7) lines(ppp[,1]~ppp[,3], lwd=3,type="o",pch=19,col=2,lty =1) legend("bottomright",horiz = FALSE,c("Reference","Inference"),pch = c(1,19), lty =c(2,1),col=c(1:2),lwd=2,cex=1.2,bty="n") dev.off() print(paste("GAPIT.cross validation ", name.of.trait,".compare to different folders.","successfully!" ,sep = "")) return(list(allstorage.inf)) }#end GAPIT.cross validation compare to different folders by replicate Times #============================================================================================= emma.kinship <- function(snps, method="additive", use="all") { n0 <- sum(snps==0,na.rm=TRUE) nh <- sum(snps==0.5,na.rm=TRUE) n1 <- sum(snps==1,na.rm=TRUE) nNA <- sum(is.na(snps)) stopifnot(n0+nh+n1+nNA == length(snps)) if ( method == "dominant" ) { flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps)) snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)] } else if ( method == "recessive" ) { flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps)) snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)] } else if ( ( method == "additive" ) && ( nh > 0 ) ) { dsnps <- snps rsnps <- snps flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps)) dsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)] flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps)) rsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)] snps <- rbind(dsnps,rsnps) } if ( use == "all" ) { mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps)) snps[is.na(snps)] <- mafs[is.na(snps)] } else if ( use == "complete.obs" ) { snps <- snps[rowSums(is.na(snps))==0,] } n <- ncol(snps) K <- matrix(nrow=n,ncol=n) diag(K) <- 1 for(i in 1:(n-1)) { for(j in (i+1):n) { x <- snps[,i]*snps[,j] + (1-snps[,i])*(1-snps[,j]) K[i,j] <- sum(x,na.rm=TRUE)/sum(!is.na(x)) K[j,i] <- K[i,j] } } return(K) } emma.eigen.L <- function(Z,K,complete=TRUE) { if ( is.null(Z) ) { return(emma.eigen.L.wo.Z(K)) } else { return(emma.eigen.L.w.Z(Z,K,complete)) } } emma.eigen.L.wo.Z <- function(K) { eig <- eigen(K,symmetric=TRUE) return(list(values=eig$values,vectors=eig$vectors)) } emma.eigen.L.w.Z <- function(Z,K,complete=TRUE) { if ( complete == FALSE ) { vids <- colSums(Z)>0 Z <- Z[,vids] K <- K[vids,vids] } eig <- eigen(K%*%crossprod(Z,Z),symmetric=FALSE,EISPACK=TRUE) return(list(values=eig$values,vectors=qr.Q(qr(Z%*%eig$vectors),complete=TRUE))) } emma.eigen.R <- function(Z,K,X,complete=TRUE) { if ( is.null(Z) ) { return(emma.eigen.R.wo.Z(K,X)) } else { return(emma.eigen.R.w.Z(Z,K,X,complete)) } } emma.eigen.R.wo.Z <- function(K, X) { n <- nrow(X) q <- ncol(X) S <- diag(n)-X%*%solve(crossprod(X,X))%*%t(X) eig <- eigen(S%*%(K+diag(1,n))%*%S,symmetric=TRUE) stopifnot(!is.complex(eig$values)) return(list(values=eig$values[1:(n-q)]-1,vectors=eig$vectors[,1:(n-q)])) } emma.eigen.R.w.Z <- function(Z, K, X, complete = TRUE) { if ( complete == FALSE ) { vids <- colSums(Z) > 0 Z <- Z[,vids] K <- K[vids,vids] } n <- nrow(Z) t <- ncol(Z) q <- ncol(X) SZ <- Z - X%*%solve(crossprod(X,X))%*%crossprod(X,Z) eig <- eigen(K%*%crossprod(Z,SZ),symmetric=FALSE,EISPACK=TRUE) if ( is.complex(eig$values) ) { eig$values <- Re(eig$values) eig$vectors <- Re(eig$vectors) } qr.X <- qr.Q(qr(X)) return(list(values=eig$values[1:(t-q)], vectors=qr.Q(qr(cbind(SZ%*%eig$vectors[,1:(t-q)],qr.X)), complete=TRUE)[,c(1:(t-q),(t+1):n)])) } emma.delta.ML.LL.wo.Z <- function(logdelta, lambda, etas, xi) { n <- length(xi) delta <- exp(logdelta) return( 0.5*(n*(log(n/(2*pi))-1-log(sum((etas*etas)/(lambda+delta))))-sum(log(xi+delta))) ) } emma.delta.ML.LL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) { t <- length(xi.1) delta <- exp(logdelta) # stopifnot(length(lambda) == length(etas.1)) return( 0.5*(n*(log(n/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(xi.1+delta))+(n-t)*logdelta)) ) } emma.delta.ML.dLL.wo.Z <- function(logdelta, lambda, etas, xi) { n <- length(xi) delta <- exp(logdelta) etasq <- etas*etas ldelta <- lambda+delta return( 0.5*(n*sum(etasq/(ldelta*ldelta))/sum(etasq/ldelta)-sum(1/(xi+delta))) ) } emma.delta.ML.dLL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) { t <- length(xi.1) q <- t-length(lambda) delta <- exp(logdelta) etasq <- etas.1*etas.1 ldelta <- lambda+delta return( 0.5*(n*(sum(etasq/(ldelta*ldelta))+etas.2.sq/(delta*delta))/(sum(etasq/ldelta)+etas.2.sq/delta)-(sum(1/(xi.1+delta))+(n-t)/delta) ) ) } emma.delta.REML.LL.wo.Z <- function(logdelta, lambda, etas) { nq <- length(etas) delta <- exp(logdelta) return( 0.5*(nq*(log(nq/(2*pi))-1-log(sum(etas*etas/(lambda+delta))))-sum(log(lambda+delta))) ) } emma.delta.REML.LL.w.Z <- function(logdelta, lambda, etas.1, n, t, etas.2.sq ) { tq <- length(etas.1) nq <- n - t + tq delta <- exp(logdelta) return( 0.5*(nq*(log(nq/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(lambda+delta))+(n-t)*logdelta)) ) } emma.delta.REML.dLL.wo.Z <- function(logdelta, lambda, etas) { nq <- length(etas) delta <- exp(logdelta) etasq <- etas*etas ldelta <- lambda+delta return( 0.5*(nq*sum(etasq/(ldelta*ldelta))/sum(etasq/ldelta)-sum(1/ldelta)) ) } emma.delta.REML.dLL.w.Z <- function(logdelta, lambda, etas.1, n, t1, etas.2.sq ) { t <- t1 tq <- length(etas.1) nq <- n - t + tq delta <- exp(logdelta) etasq <- etas.1*etas.1 ldelta <- lambda+delta return( 0.5*(nq*(sum(etasq/(ldelta*ldelta))+etas.2.sq/(delta*delta))/(sum(etasq/ldelta)+etas.2.sq/delta)-(sum(1/ldelta)+(n-t)/delta)) ) } emma.MLE <- function(y, X, K, Z=NULL, ngrids=100, llim=-10, ulim=10, esp=1e-10, eig.L = NULL, eig.R = NULL) { n <- length(y) t <- nrow(K) q <- ncol(X) # stopifnot(nrow(K) == t) stopifnot(ncol(K) == t) stopifnot(nrow(X) == n) if ( det(crossprod(X,X)) == 0 ) { warning("X is singular") return (list(ML=0,delta=0,ve=0,vg=0)) } if ( is.null(Z) ) { if ( is.null(eig.L) ) { eig.L <- emma.eigen.L.wo.Z(K) } if ( is.null(eig.R) ) { eig.R <- emma.eigen.R.wo.Z(K,X) } etas <- crossprod(eig.R$vectors,y) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,n-q,m) + matrix(delta,n-q,m,byrow=TRUE) Xis <- matrix(eig.L$values,n,m) + matrix(delta,n,m,byrow=TRUE) Etasq <- matrix(etas*etas,n-q,m) LL <- 0.5*(n*(log(n/(2*pi))-1-log(colSums(Etasq/Lambdas)))-colSums(log(Xis))) dLL <- 0.5*delta*(n*colSums(Etasq/(Lambdas*Lambdas))/colSums(Etasq/Lambdas)-colSums(1/Xis)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if ( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.ML.LL.wo.Z(llim,eig.R$values,etas,eig.L$values)) } if ( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.ML.LL.wo.Z(ulim,eig.R$values,etas,eig.L$values)) } for( i in 1:(m-1) ) { if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.ML.dLL.wo.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas=etas, xi=eig.L$values) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.ML.LL.wo.Z(r$root,eig.R$values, etas, eig.L$values)) } } # optdelta <- exp(optlogdelta) } else { if ( is.null(eig.L) ) { eig.L <- emma.eigen.L.w.Z(Z,K) } if ( is.null(eig.R) ) { eig.R <- emma.eigen.R.w.Z(Z,K,X) } etas <- crossprod(eig.R$vectors,y) etas.1 <- etas[1:(t-q)] etas.2 <- etas[(t-q+1):(n-q)] etas.2.sq <- sum(etas.2*etas.2) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,t-q,m) + matrix(delta,t-q,m,byrow=TRUE) Xis <- matrix(eig.L$values,t,m) + matrix(delta,t,m,byrow=TRUE) Etasq <- matrix(etas.1*etas.1,t-q,m) #LL <- 0.5*(n*(log(n/(2*pi))-1-log(colSums(Etasq/Lambdas)+etas.2.sq/delta))-colSums(log(Xis))+(n-t)*log(deltas)) dLL <- 0.5*delta*(n*(colSums(Etasq/(Lambdas*Lambdas))+etas.2.sq/(delta*delta))/(colSums(Etasq/Lambdas)+etas.2.sq/delta)-(colSums(1/Xis)+(n-t)/delta)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if ( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.ML.LL.w.Z(llim,eig.R$values,etas.1,eig.L$values,n,etas.2.sq)) } if ( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.ML.LL.w.Z(ulim,eig.R$values,etas.1,eig.L$values,n,etas.2.sq)) } for( i in 1:(m-1) ) { if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.ML.dLL.w.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas.1=etas.1, xi.1=eig.L$values, n=n, etas.2.sq = etas.2.sq ) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.ML.LL.w.Z(r$root,eig.R$values, etas.1, eig.L$values, n, etas.2.sq )) } } # optdelta <- exp(optlogdelta) } maxdelta <- exp(optlogdelta[which.max(optLL)]) maxLL <- max(optLL) if ( is.null(Z) ) { maxva <- sum(etas*etas/(eig.R$values+maxdelta))/n } else { maxva <- (sum(etas.1*etas.1/(eig.R$values+maxdelta))+etas.2.sq/maxdelta)/n } maxve <- maxva*maxdelta return (list(ML=maxLL,delta=maxdelta,ve=maxve,vg=maxva)) } emma.REMLE <- function(y, X, K, Z=NULL, ngrids=100, llim=-10, ulim=10, esp=1e-10, eig.L = NULL, eig.R = NULL) { n <- length(y) t <- nrow(K) q <- ncol(X) # stopifnot(nrow(K) == t) stopifnot(ncol(K) == t) stopifnot(nrow(X) == n) if ( det(crossprod(X,X)) == 0 ) { warning("X is singular") return (list(REML=0,delta=0,ve=0,vg=0)) } if ( is.null(Z) ) { if ( is.null(eig.R) ) { eig.R <- emma.eigen.R.wo.Z(K,X) } etas <- crossprod(eig.R$vectors,y) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,n-q,m) + matrix(delta,n-q,m,byrow=TRUE) Etasq <- matrix(etas*etas,n-q,m) LL <- 0.5*((n-q)*(log((n-q)/(2*pi))-1-log(colSums(Etasq/Lambdas)))-colSums(log(Lambdas))) dLL <- 0.5*delta*((n-q)*colSums(Etasq/(Lambdas*Lambdas))/colSums(Etasq/Lambdas)-colSums(1/Lambdas)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if ( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim,eig.R$values,etas)) } if ( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim,eig.R$values,etas)) } for( i in 1:(m-1) ) { if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.REML.dLL.wo.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas=etas) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root,eig.R$values, etas)) } } # optdelta <- exp(optlogdelta) } else { if ( is.null(eig.R) ) { eig.R <- emma.eigen.R.w.Z(Z,K,X) } etas <- crossprod(eig.R$vectors,y) etas.1 <- etas[1:(t-q)] etas.2 <- etas[(t-q+1):(n-q)] etas.2.sq <- sum(etas.2*etas.2) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,t-q,m) + matrix(delta,t-q,m,byrow=TRUE) Etasq <- matrix(etas.1*etas.1,t-q,m) dLL <- 0.5*delta*((n-q)*(colSums(Etasq/(Lambdas*Lambdas))+etas.2.sq/(delta*delta))/(colSums(Etasq/Lambdas)+etas.2.sq/delta)-(colSums(1/Lambdas)+(n-t)/delta)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if ( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim,eig.R$values,etas.1,n,t,etas.2.sq)) } if ( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim,eig.R$values,etas.1,n,t,etas.2.sq)) } for( i in 1:(m-1) ) { if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.REML.dLL.w.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas.1=etas.1, n=n, t1=t, etas.2.sq = etas.2.sq ) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root,eig.R$values, etas.1, n, t, etas.2.sq )) } } # optdelta <- exp(optlogdelta) } maxdelta <- exp(optlogdelta[which.max(optLL)]) maxLL <- max(optLL) if ( is.null(Z) ) { maxva <- sum(etas*etas/(eig.R$values+maxdelta))/(n-q) } else { maxva <- (sum(etas.1*etas.1/(eig.R$values+maxdelta))+etas.2.sq/maxdelta)/(n-q) } maxve <- maxva*maxdelta return (list(REML=maxLL,delta=maxdelta,ve=maxve,vg=maxva)) } emma.ML.LRT <- function(ys, xs, K, Z=NULL, X0 = NULL, ngrids=100, llim=-10, ulim=10, esp=1e-10, ponly = FALSE) { if ( is.null(dim(ys)) || ncol(ys) == 1 ) { ys <- matrix(ys,1,length(ys)) } if ( is.null(dim(xs)) || ncol(xs) == 1 ) { xs <- matrix(xs,1,length(xs)) } if ( is.null(X0) ) { X0 <- matrix(1,ncol(ys),1) } g <- nrow(ys) n <- ncol(ys) m <- nrow(xs) t <- ncol(xs) q0 <- ncol(X0) q1 <- q0 + 1 if ( !ponly ) { ML1s <- matrix(nrow=m,ncol=g) ML0s <- matrix(nrow=m,ncol=g) vgs <- matrix(nrow=m,ncol=g) ves <- matrix(nrow=m,ncol=g) } stats <- matrix(nrow=m,ncol=g) ps <- matrix(nrow=m,ncol=g) ML0 <- vector(length=g) stopifnot(nrow(K) == t) stopifnot(ncol(K) == t) stopifnot(nrow(X0) == n) if ( sum(is.na(ys)) == 0 ) { eig.L <- emma.eigen.L(Z,K) eig.R0 <- emma.eigen.R(Z,K,X0) for(i in 1:g) { ML0[i] <- emma.MLE(ys[i,],X0,K,Z,ngrids,llim,ulim,esp,eig.L,eig.R0)$ML } x.prev <- vector(length=0) for(i in 1:m) { vids <- !is.na(xs[i,]) nv <- sum(vids) xv <- xs[i,vids] if ( ( mean(xv) <= 0 ) || ( mean(xv) >= 1 ) ) { if (!ponly) { stats[i,] <- rep(NA,g) vgs[i,] <- rep(NA,g) ves[i,] <- rep(NA,g) ML1s[i,] <- rep(NA,g) ML0s[i,] <- rep(NA,g) } ps[i,] = rep(1,g) } else if ( identical(x.prev, xv) ) { if ( !ponly ) { stats[i,] <- stats[i-1,] vgs[i,] <- vgs[i-1,] ves[i,] <- ves[i-1,] ML1s[i,] <- ML1s[i-1,] ML0s[i,] <- ML0s[i-1,] } ps[i,] <- ps[i-1,] } else { if ( is.null(Z) ) { X <- cbind(X0[vids,,drop=FALSE],xs[i,vids]) eig.R1 = emma.eigen.R.wo.Z(K[vids,vids],X) } else { vrows <- as.logical(rowSums(Z[,vids])) nr <- sum(vrows) X <- cbind(X0[vrows,,drop=FALSE],Z[vrows,vids]%*%t(xs[i,vids,drop=FALSE])) eig.R1 = emma.eigen.R.w.Z(Z[vrows,vids],K[vids,vids],X) } for(j in 1:g) { if ( nv == t ) { MLE <- emma.MLE(ys[j,],X,K,Z,ngrids,llim,ulim,esp,eig.L,eig.R1) # MLE <- emma.MLE(ys[j,],X,K,Z,ngrids,llim,ulim,esp,eig.L,eig.R1) if (!ponly) { ML1s[i,j] <- MLE$ML vgs[i,j] <- MLE$vg ves[i,j] <- MLE$ve } stats[i,j] <- 2*(MLE$ML-ML0[j]) } else { if ( is.null(Z) ) { eig.L0 <- emma.eigen.L.wo.Z(K[vids,vids]) MLE0 <- emma.MLE(ys[j,vids],X0[vids,,drop=FALSE],K[vids,vids],NULL,ngrids,llim,ulim,esp,eig.L0) MLE1 <- emma.MLE(ys[j,vids],X,K[vids,vids],NULL,ngrids,llim,ulim,esp,eig.L0) } else { if ( nr == n ) { MLE1 <- emma.MLE(ys[j,],X,K,Z,ngrids,llim,ulim,esp,eig.L) } else { eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vids],K[vids,vids]) MLE0 <- emma.MLE(ys[j,vrows],X0[vrows,,drop=FALSE],K[vids,vids],Z[vrows,vids],ngrids,llim,ulim,esp,eig.L0) MLE1 <- emma.MLE(ys[j,vrows],X,K[vids,vids],Z[vrows,vids],ngrids,llim,ulim,esp,eig.L0) } } if (!ponly) { ML1s[i,j] <- MLE1$ML ML0s[i,j] <- MLE0$ML vgs[i,j] <- MLE1$vg ves[i,j] <- MLE1$ve } stats[i,j] <- 2*(MLE1$ML-MLE0$ML) } } if ( ( nv == t ) && ( !ponly ) ) { ML0s[i,] <- ML0 } ps[i,] <- pchisq(stats[i,],1,lower.tail=FALSE) } } } else { eig.L <- emma.eigen.L(Z,K) eig.R0 <- emma.eigen.R(Z,K,X0) for(i in 1:g) { vrows <- !is.na(ys[i,]) if ( is.null(Z) ) { ML0[i] <- emma.MLE(ys[i,vrows],X0[vrows,,drop=FALSE],K[vrows,vrows],NULL,ngrids,llim,ulim,esp)$ML } else { vids <- colSums(Z[vrows,]>0) ML0[i] <- emma.MLE(ys[i,vrows],X0[vrows,,drop=FALSE],K[vids,vids],Z[vrows,vids],ngrids,llim,ulim,esp)$ML } } x.prev <- vector(length=0) for(i in 1:m) { vids <- !is.na(xs[i,]) nv <- sum(vids) xv <- xs[i,vids] if ( ( mean(xv) <= 0 ) || ( mean(xv) >= 1 ) ) { if (!ponly) { stats[i,] <- rep(NA,g) vgs[i,] <- rep(NA,g) ves[i,] <- rep(NA,g) ML1s[i,] <- rep(NA,g) ML0s[,i] <- rep(NA,g) } ps[i,] = rep(1,g) } else if ( identical(x.prev, xv) ) { if ( !ponly ) { stats[i,] <- stats[i-1,] vgs[i,] <- vgs[i-1,] ves[i,] <- ves[i-1,] ML1s[i,] <- ML1s[i-1,] } ps[i,] = ps[i-1,] } else { if ( is.null(Z) ) { X <- cbind(X0,xs[i,]) if ( nv == t ) { eig.R1 = emma.eigen.R.wo.Z(K,X) } } else { vrows <- as.logical(rowSums(Z[,vids])) X <- cbind(X0,Z[,vids,drop=FALSE]%*%t(xs[i,vids,drop=FALSE])) if ( nv == t ) { eig.R1 = emma.eigen.R.w.Z(Z,K,X) } } for(j in 1:g) { # print(j) vrows <- !is.na(ys[j,]) if ( nv == t ) { nr <- sum(vrows) if ( is.null(Z) ) { if ( nr == n ) { MLE <- emma.MLE(ys[j,],X,K,NULL,ngrids,llim,ulim,esp,eig.L,eig.R1) } else { MLE <- emma.MLE(ys[j,vrows],X[vrows,],K[vrows,vrows],NULL,ngrids,llim,ulim,esp) } } else { if ( nr == n ) { MLE <- emma.MLE(ys[j,],X,K,Z,ngrids,llim,ulim,esp,eig.L,eig.R1) } else { vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) MLE <- emma.MLE(ys[j,vrows],X[vrows,],K[vtids,vtids],Z[vrows,vtids],ngrids,llim,ulim,esp) } } if (!ponly) { ML1s[i,j] <- MLE$ML vgs[i,j] <- MLE$vg ves[i,j] <- MLE$ve } stats[i,j] <- 2*(MLE$ML-ML0[j]) } else { if ( is.null(Z) ) { vtids <- vrows & vids eig.L0 <- emma.eigen.L(NULL,K[vtids,vtids]) MLE0 <- emma.MLE(ys[j,vtids],X0[vtids,,drop=FALSE],K[vtids,vtids],NULL,ngrids,llim,ulim,esp,eig.L0) MLE1 <- emma.MLE(ys[j,vtids],X[vtids,],K[vtids,vtids],NULL,ngrids,llim,ulim,esp,eig.L0) } else { vtids <- as.logical(colSums(Z[vrows,])) & vids vtrows <- vrows & as.logical(rowSums(Z[,vids])) eig.L0 <- emma.eigen.L(Z[vtrows,vtids],K[vtids,vtids]) MLE0 <- emma.MLE(ys[j,vtrows],X0[vtrows,,drop=FALSE],K[vtids,vtids],Z[vtrows,vtids],ngrids,llim,ulim,esp,eig.L0) MLE1 <- emma.MLE(ys[j,vtrows],X[vtrows,],K[vtids,vtids],Z[vtrows,vtids],ngrids,llim,ulim,esp,eig.L0) } if (!ponly) { ML1s[i,j] <- MLE1$ML vgs[i,j] <- MLE1$vg ves[i,j] <- MLE1$ve ML0s[i,j] <- MLE0$ML } stats[i,j] <- 2*(MLE1$ML-MLE0$ML) } } if ( ( nv == t ) && ( !ponly ) ) { ML0s[i,] <- ML0 } ps[i,] <- pchisq(stats[i,],1,lower.tail=FALSE) } } } if ( ponly ) { return (ps) } else { return (list(ps=ps,ML1s=ML1s,ML0s=ML0s,stats=stats,vgs=vgs,ves=ves)) } } emma.REML.t <- function(ys, xs, K, Z=NULL, X0 = NULL, ngrids=100, llim=-10, ulim=10, esp=1e-10, ponly = FALSE) { if ( is.null(dim(ys)) || ncol(ys) == 1 ) { ys <- matrix(ys,1,length(ys)) } if ( is.null(dim(xs)) || ncol(xs) == 1 ) { xs <- matrix(xs,1,length(xs)) } if ( is.null(X0) ) { X0 <- matrix(1,ncol(ys),1) } g <- nrow(ys) n <- ncol(ys) m <- nrow(xs) t <- ncol(xs) q0 <- ncol(X0) q1 <- q0 + 1 stopifnot(nrow(K) == t) stopifnot(ncol(K) == t) stopifnot(nrow(X0) == n) if ( !ponly ) { REMLs <- matrix(nrow=m,ncol=g) vgs <- matrix(nrow=m,ncol=g) ves <- matrix(nrow=m,ncol=g) } dfs <- matrix(nrow=m,ncol=g) stats <- matrix(nrow=m,ncol=g) ps <- matrix(nrow=m,ncol=g) if ( sum(is.na(ys)) == 0 ) { eig.L <- emma.eigen.L(Z,K) x.prev <- vector(length=0) for(i in 1:m) { vids <- !is.na(xs[i,]) nv <- sum(vids) xv <- xs[i,vids] if ( ( mean(xv) <= 0 ) || ( mean(xv) >= 1 ) ) { if ( !ponly ) { vgs[i,] <- rep(NA,g) ves[i,] <- rep(NA,g) dfs[i,] <- rep(NA,g) REMLs[i,] <- rep(NA,g) stats[i,] <- rep(NA,g) } ps[i,] = rep(1,g) } else if ( identical(x.prev, xv) ) { if ( !ponly ) { vgs[i,] <- vgs[i-1,] ves[i,] <- ves[i-1,] dfs[i,] <- dfs[i-1,] REMLs[i,] <- REMLs[i-1,] stats[i,] <- stats[i-1,] } ps[i,] <- ps[i-1,] } else { if ( is.null(Z) ) { X <- cbind(X0[vids,,drop=FALSE],xs[i,vids]) eig.R1 = emma.eigen.R.wo.Z(K[vids,vids],X) } else { vrows <- as.logical(rowSums(Z[,vids])) X <- cbind(X0[vrows,,drop=FALSE],Z[vrows,vids,drop=FALSE]%*%t(xs[i,vids,drop=FALSE])) eig.R1 = emma.eigen.R.w.Z(Z[vrows,vids],K[vids,vids],X) } for(j in 1:g) { if ( nv == t ) { REMLE <- emma.REMLE(ys[j,],X,K,Z,ngrids,llim,ulim,esp,eig.R1) if ( is.null(Z) ) { U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),t,t,byrow=TRUE) dfs[i,j] <- nv - q1 } else { U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE) dfs[i,j] <- n - q1 } yt <- crossprod(U,ys[j,]) Xt <- crossprod(U,X) iXX <- solve(crossprod(Xt,Xt)) beta <- iXX%*%crossprod(Xt,yt) if ( !ponly ) { vgs[i,j] <- REMLE$vg ves[i,j] <- REMLE$ve REMLs[i,j] <- REMLE$REML } stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg) } else { if ( is.null(Z) ) { eig.L0 <- emma.eigen.L.wo.Z(K[vids,vids]) nr <- sum(vids) yv <- ys[j,vids] REMLE <- emma.REMLE(yv,X,K[vids,vids,drop=FALSE],NULL,ngrids,llim,ulim,esp,eig.R1) U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE) dfs[i,j] <- nr - q1 } else { eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vids,drop=FALSE],K[vids,vids]) yv <- ys[j,vrows] nr <- sum(vrows) tv <- sum(vids) REMLE <- emma.REMLE(yv,X,K[vids,vids,drop=FALSE],Z[vrows,vids,drop=FALSE],ngrids,llim,ulim,esp,eig.R1) U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-tv)),nr,nr,byrow=TRUE) dfs[i,j] <- nr - q1 } yt <- crossprod(U,yv) Xt <- crossprod(U,X) iXX <- solve(crossprod(Xt,Xt)) beta <- iXX%*%crossprod(Xt,yt) if (!ponly) { vgs[i,j] <- REMLE$vg ves[i,j] <- REMLE$ve REMLs[i,j] <- REMLE$REML } stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg) } } ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE) } } } else { eig.L <- emma.eigen.L(Z,K) eig.R0 <- emma.eigen.R(Z,K,X0) x.prev <- vector(length=0) for(i in 1:m) { vids <- !is.na(xs[i,]) nv <- sum(vids) xv <- xs[i,vids] if ( ( mean(xv) <= 0 ) || ( mean(xv) >= 1 ) ) { if (!ponly) { vgs[i,] <- rep(NA,g) ves[i,] <- rep(NA,g) REMLs[i,] <- rep(NA,g) dfs[i,] <- rep(NA,g) } ps[i,] = rep(1,g) } else if ( identical(x.prev, xv) ) { if ( !ponly ) { stats[i,] <- stats[i-1,] vgs[i,] <- vgs[i-1,] ves[i,] <- ves[i-1,] REMLs[i,] <- REMLs[i-1,] dfs[i,] <- dfs[i-1,] } ps[i,] = ps[i-1,] } else { if ( is.null(Z) ) { X <- cbind(X0,xs[i,]) if ( nv == t ) { eig.R1 = emma.eigen.R.wo.Z(K,X) } } else { vrows <- as.logical(rowSums(Z[,vids,drop=FALSE])) X <- cbind(X0,Z[,vids,drop=FALSE]%*%t(xs[i,vids,drop=FALSE])) if ( nv == t ) { eig.R1 = emma.eigen.R.w.Z(Z,K,X) } } for(j in 1:g) { vrows <- !is.na(ys[j,]) if ( nv == t ) { yv <- ys[j,vrows] nr <- sum(vrows) if ( is.null(Z) ) { if ( nr == n ) { REMLE <- emma.REMLE(yv,X,K,NULL,ngrids,llim,ulim,esp,eig.R1) U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),n,n,byrow=TRUE) } else { eig.L0 <- emma.eigen.L.wo.Z(K[vrows,vrows,drop=FALSE]) REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vrows,vrows,drop=FALSE],NULL,ngrids,llim,ulim,esp) U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE) } dfs[i,j] <- nr-q1 } else { if ( nr == n ) { REMLE <- emma.REMLE(yv,X,K,Z,ngrids,llim,ulim,esp,eig.R1) U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE) } else { vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE]) REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vrows,vtids,drop=FALSE],ngrids,llim,ulim,esp) U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE) } dfs[i,j] <- nr-q1 } yt <- crossprod(U,yv) Xt <- crossprod(U,X[vrows,,drop=FALSE]) iXX <- solve(crossprod(Xt,Xt)) beta <- iXX%*%crossprod(Xt,yt) if ( !ponly ) { vgs[i,j] <- REMLE$vg ves[i,j] <- REMLE$ve REMLs[i,j] <- REMLE$REML } stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg) } else { if ( is.null(Z) ) { vtids <- vrows & vids eig.L0 <- emma.eigen.L.wo.Z(K[vtids,vtids,drop=FALSE]) yv <- ys[j,vtids] nr <- sum(vtids) REMLE <- emma.REMLE(yv,X[vtids,,drop=FALSE],K[vtids,vtids,drop=FALSE],NULL,ngrids,llim,ulim,esp) U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE) Xt <- crossprod(U,X[vtids,,drop=FALSE]) dfs[i,j] <- nr-q1 } else { vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) & vids vtrows <- vrows & as.logical(rowSums(Z[,vids,drop=FALSE])) eig.L0 <- emma.eigen.L.w.Z(Z[vtrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE]) yv <- ys[j,vtrows] nr <- sum(vtrows) REMLE <- emma.REMLE(yv,X[vtrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vtrows,vtids,drop=FALSE],ngrids,llim,ulim,esp) U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE) Xt <- crossprod(U,X[vtrows,,drop=FALSE]) dfs[i,j] <- nr-q1 } yt <- crossprod(U,yv) iXX <- solve(crossprod(Xt,Xt)) beta <- iXX%*%crossprod(Xt,yt) if ( !ponly ) { vgs[i,j] <- REMLE$vg ves[i,j] <- REMLE$ve REMLs[i,j] <- REMLE$REML } stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg) } } ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE) } } } if ( ponly ) { return (ps) } else { return (list(ps=ps,REMLs=REMLs,stats=stats,dfs=dfs,vgs=vgs,ves=ves)) } } `GAPIT.emma.REMLE` <- function(y, X, K, Z=NULL, ngrids=100, llim=-10, ulim=10, esp=1e-10, eig.L = NULL, eig.R = NULL) { # Authors: Hyun Min Kang # Modified (only one line) by Zhiwu Zhang to handle non-defined LL ("NaN") by replacing it with the worst LL. # Last update: June 8, 2011 ############################################################################################## n <- length(y) t <- nrow(K) q <- ncol(X) # stopifnot(nrow(K) == t) stopifnot(ncol(K) == t) stopifnot(nrow(X) == n) if( det(crossprod(X,X)) == 0 ) { warning("X is singular") return (list(REML=0,delta=0,ve=0,vg=0)) } if(is.null(Z) ) { if(is.null(eig.R) ) { eig.R <- emma.eigen.R.wo.Z(K,X) } etas <- crossprod(eig.R$vectors,y) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,n-q,m) + matrix(delta,n-q,m,byrow=TRUE) Etasq <- matrix(etas*etas,n-q,m) LL <- 0.5*((n-q)*(log((n-q)/(2*pi))-1-log(colSums(Etasq/Lambdas)))-colSums(log(Lambdas))) dLL <- 0.5*delta*((n-q)*colSums(Etasq/(Lambdas*Lambdas))/colSums(Etasq/Lambdas)-colSums(1/Lambdas)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim,eig.R$values,etas)) } if( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim,eig.R$values,etas)) } for(i in 1:(m-1) ) { if( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.REML.dLL.wo.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas=etas) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root,eig.R$values, etas)) } } # optdelta <- exp(optlogdelta) } else { if(is.null(eig.R) ) { eig.R <- emma.eigen.R.w.Z(Z,K,X) } etas <- crossprod(eig.R$vectors,y) etas.1 <- etas[1:(t-q)] etas.2 <- etas[(t-q+1):(n-q)] etas.2.sq <- sum(etas.2*etas.2) logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim m <- length(logdelta) delta <- exp(logdelta) Lambdas <- matrix(eig.R$values,t-q,m) + matrix(delta,t-q,m,byrow=TRUE) Etasq <- matrix(etas.1*etas.1,t-q,m) dLL <- 0.5*delta*((n-q)*(colSums(Etasq/(Lambdas*Lambdas))+etas.2.sq/(delta*delta))/(colSums(Etasq/Lambdas)+etas.2.sq/delta)-(colSums(1/Lambdas)+(n-t)/delta)) optlogdelta <- vector(length=0) optLL <- vector(length=0) if( dLL[1] < esp ) { optlogdelta <- append(optlogdelta, llim) optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim,eig.R$values,etas.1,n,t,etas.2.sq)) } if( dLL[m-1] > 0-esp ) { optlogdelta <- append(optlogdelta, ulim) optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim,eig.R$values,etas.1,n,t,etas.2.sq)) } for(i in 1:(m-1) ) { if( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) { r <- uniroot(emma.delta.REML.dLL.w.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas.1=etas.1, n=n, t1=t, etas.2.sq = etas.2.sq ) optlogdelta <- append(optlogdelta, r$root) optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root,eig.R$values, etas.1, n, t, etas.2.sq )) } } # optdelta <- exp(optlogdelta) } maxdelta <- exp(optlogdelta[which.max(optLL)]) #handler of grids with NaN log optLL=GAPIT.replaceNaN(optLL) maxLL <- max(optLL) if(is.null(Z) ) { maxva <- sum(etas*etas/(eig.R$values+maxdelta))/(n-q) } else { maxva <- (sum(etas.1*etas.1/(eig.R$values+maxdelta))+etas.2.sq/maxdelta)/(n-q) } maxve <- maxva*maxdelta return (list(REML=maxLL,delta=maxdelta,ve=maxve,vg=maxva)) } #============================================================================================= `GAPIT.get.LL` <- cmpfun(function(pheno,geno=NULL,snp.pool,X0=NULL){ # evaluation of the maximum likelihood #Input: ys, xs, vg, delta, Z, X0, snp.pool #Output: LL #Authors: Qishan Wang, Feng Tian and Zhiwu Zhang #Last update: April 16, 2012 ################################################################################ #print("GAPIT.get.LL started") #print("dimension of pheno, snpool and X0") #print(dim(pheno)) #print(length(pheno)) #print(dim(snp.pool)) #print(length(snp.pool)) #print(dim(X0)) #print(length(X0)) y=pheno p=0 deltaExpStart = -5 deltaExpEnd = 5 snp.pool=snp.pool[,] if(!is.null(snp.pool)&&var(snp.pool)==0){ deltaExpStart = 100 deltaExpEnd = deltaExpStart #print("deltaExp change here") } if(is.null(X0)) { X0 = matrix(1, nrow(snp.pool), 1) } #snp.test=as.numeric(geno[,1]) #X <- cbind(X0, snp.test) X=X0 #########SVD of X K.X.svd= svd(snp.pool,LINPACK=TRUE)######rivised by Jiabo Wang 2016.1.8 # snp.pool=NA problem occurred #####rivised 2012.4.15 by qishan wang d=K.X.svd$d d=d[d>1e-08] d=d^2 U1=K.X.svd$u U1=U1[,1:length(d)] ##rivised 2012.4.15 by qishan wang #handler of single snp if(is.null(dim(U1))) U1=matrix(U1,ncol=1) ################### n=nrow(U1) #I= diag(1,nrow(U1)) #xiaolei removed, this costs lots of memory U1TX=crossprod(U1,X) U1TY=crossprod(U1,y) yU1TY<- y-U1%*%U1TY XU1TX<- X-U1%*%U1TX ### i is out of bracket #xiaolei rewrite following 4 lines IU = -tcrossprod(U1,U1) diag(IU) = rep(1,n) + diag(IU) #IUU=(I-tcrossprod(U1,U1)) IUX=crossprod(IU,X ) IUY=crossprod(IU,y) #Iteration on the range of delta (-5 to 5 in glog scale) for (m in seq(deltaExpStart,deltaExpEnd,by=0.1)) { p=p+1 delta<- exp(m) #----------------------------calculate beta------------------------------------- #######get beta compnents 1 beta1=0 for(i in 1:length(d)){ one=matrix(U1TX[i,], nrow=1) beta=crossprod(one,(one/(d[i]+delta))) #This is not real beta, confusing beta1= beta1+beta } #######get beta components 2 beta2=0 for(i in 1:nrow(U1)){ one=matrix(IUX[i,], nrow=1) dim(one) beta=crossprod(one,one) beta2= beta2+beta } beta2<-beta2/delta #######get b3 beta3=0 for(i in 1:length(d)){ one1=matrix(U1TX[i,], nrow=1) one2=matrix(U1TY[i,], nrow=1) beta=crossprod(one1,(one2/(d[i]+delta))) #This is not real beta, confusing beta3= beta3+beta } ###########get beta4 beta4=0 for(i in 1:nrow(U1)){ one1=matrix(IUX[i,], nrow=1) one2=matrix(IUY[i,], nrow=1) beta=crossprod(one1,one2) #This is not real beta, confusing beta4= beta4+beta } beta4<-beta4/delta #######get final beta #zw1=solve(beta1+beta2) zw1 <- try(solve(beta1+beta2),silent=TRUE) if(inherits(zw1, "try-error")){ zw1 <- ginv(beta1+beta2) } #zw1=ginv(beta1+beta2) zw2=(beta3+beta4) beta=crossprod(zw1,zw2) #This is the real beta #----------------------------calculate LL--------------------------------------- ####part 1 part11<-n*log(2*3.14) part12<-0 for(i in 1:length(d)){ part12_pre=log(d[i]+delta) part12= part12+part12_pre } part13<- (nrow(U1)-length(d))*log(delta) part1<- -1/2*(part11+part12+part13) ###### part2 part21<-nrow(U1) ######part221 part221=0 for(i in 1:length(d)){ one1=matrix(U1TX[i,], nrow=1) one2=matrix(U1TY[i,], nrow=1) part221_pre=(one2-one1%*%beta)^2/(d[i]+delta) ###### beta contain covariate and snp %*% part221= part221+part221_pre } ######part222 part222=0 for(i in 1:n){ one1=matrix(XU1TX[i,], nrow=1) one2=matrix(yU1TY[i,], nrow=1) part222_pre=((one2-one1%*%beta)^2)/delta part222= part222+part222_pre } part22<-n*log((1/n)*(part221+part222)) part2<- -1/2*(part21+part22) ################# likihood LL<-part1+part2 part1<-0 part2<-0 #-----------------------Save the optimum--------------------------------------- if(p==1){ beta.save=beta delta.save=delta LL.save=LL }else{ if(LL>LL.save){ beta.save=beta delta.save=delta LL.save=LL } } } # end of Iteration on the range of delta (-5 to 5 in glog scale) #--------------------update with the optimum------------------------------------ beta=beta.save delta=delta.save LL=LL.save names(delta)=NULL names(LL)=NULL #--------------------calculating Va and Vem------------------------------------- #sigma_a1 #U1TX=crossprod(U1,X)#xiaolei removed, it is re-calculated #U1TY=crossprod(U1,y)#xiaolei removed, it is re-calculated sigma_a1=0 for(i in 1:length(d)){ one1=matrix(U1TX[i,], nrow=1) one2=matrix(U1TY[i,], nrow=1) sigma_a1_pre=(one2-one1%*%beta)^2/(d[i]+delta) sigma_a1= sigma_a1+sigma_a1_pre } ### sigma_a2 #xiaolei removed following 3 lines #IU=I-tcrossprod(U1,U1) #This needs to be done only once #IUX=crossprod(IU,X) #IUY=crossprod(IU,y) sigma_a2=0 for(i in 1:nrow(U1)){ one1=matrix(IUX[i,], nrow=1) one2=matrix(IUY[i,], nrow=1) sigma_a2_pre<-(one2-one1%*%beta)^2 sigma_a2= sigma_a2+sigma_a2_pre } sigma_a2<-sigma_a2/delta sigma_a<- 1/n*(sigma_a1+sigma_a2) sigma_e<-delta*sigma_a return(list(beta=beta, delta=delta, LL=LL, vg=sigma_a,ve=sigma_e)) } )#end of cmpfun( #============================================================================================= `GAPIT.kinship.VanRaden` <- function(snps,hasInbred=TRUE) { # Object: To calculate the kinship matrix using the method of VanRaden (2009, J. Dairy Sci. 91:4414???C4423) # Input: snps is n individual rows by m snps columns # Output: n by n relationship matrix # Authors: Zhwiu Zhang # Last update: March 2, 2016 ############################################################################################## print("Calculating kinship with VanRaden method...") #Remove invariants fa=colSums(snps)/(2*nrow(snps)) index.non=fa>=1| fa<=0 snps=snps[,!index.non] nSNP=ncol(snps) nInd=nrow(snps) n=nInd ##allele frequency of second allele p=colSums(snps)/(2*nInd) P=2*(p-.5) #Difference from .5, multiple by 2 snps=snps-1 #Change from 0/1/2 coding to -1/0/1 coding print("substracting P...") Z=t(snps)-P#operation on matrix and vector goes in direction of column print("Getting X'X...") #K=tcrossprod((snps), (snps)) K=crossprod((Z), (Z)) #Thanks to Peng Zheng, Meng Huang and Jiafa Chen for finding the problem print("Adjusting...") adj=2*sum(p*(1-p)) K=K/adj print("Calculating kinship with VanRaden method: done") return(K) } #============================================================================================= `GAPIT.kinship.ZHANG` <- function(snps,hasInbred=TRUE) { # Object: To calculate ZHANG (Zones Harbored Adjustments of Negligent Genetic) relationship # Authors: Zhwiu Zhang # Last update: october 25, 2014 ############################################################################################## print("Calculating ZHANG relationship defined by Zhiwu Zhang...") #Remove invariants fa=colSums(snps)/(2*nrow(snps)) index.non=fa>=1| fa<=0 snps=snps[,!index.non] het=1-abs(snps-1) ind.sum=rowSums(het) fi=ind.sum/(2*ncol(snps)) inbreeding=1-min(fi) nSNP=ncol(snps) nInd=nrow(snps) n=nInd snpMean= apply(snps,2,mean) #get mean for each snp print("substracting mean...") snps=t(snps)-snpMean #operation on matrix and vector goes in direction of column print("Getting X'X...") #K=tcrossprod((snps), (snps)) K=crossprod((snps), (snps)) if(is.na(K[1,1])) stop ("GAPIT says: Missing data is not allowed for numerical genotype data") print("Adjusting...") #Extract diagonals i =1:n j=(i-1)*n index=i+j d=K[index] DL=min(d) DU=max(d) floor=min(K) #Set range between 0 and 2 top=1+inbreeding K=top*(K-floor)/(DU-floor) Dmin=top*(DL-floor)/(DU-floor) #Adjust based on expected minimum diagonal (1) if(Dmin<1) { print("Adjustment by the minimum diagonal") K[index]=(K[index]-Dmin+1)/((top+1-Dmin)*.5) K[-index]=K[-index]*(1/Dmin) } #Limiting the maximum offdiagonal to the top Omax=max(K[-index]) if(Omax>top){ print("Adjustment by the minimum off diagonal") K[-index]=K[-index]*(top/Omax) } print("Calculating kinship with Zhang method: done") return(K) } #============================================================================================= `GAPIT.kinship.loiselle` <- function(snps, method="additive", use="all") { # Object: To calculate the kinship matrix using the method of Loiselle et al. (1995) # Authors: Alex Lipka and Hyun Min Kang # Last update: May 31, 2011 ############################################################################################## #Number of SNP types that are 0s n0 <- sum(snps==0,na.rm=TRUE) #Number of heterozygote SNP types nh <- sum(snps==0.5,na.rm=TRUE) #Number of SNP types that are 1s n1 <- sum(snps==1,na.rm=TRUE) #Number of SNP types that are missing nNA <- sum(is.na(snps)) #Self explanatory dim(snps)[1]*dim(snps)[2] #stopifnot(n0+nh+n1+nNA == length(snps)) #Note that the two lines in if(method == "dominant") and if(method == "recessive") are found in #if(method == "additive"). Worry about this only if you have heterozygotes, which you do not. if( method == "dominant" ) { flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps)) snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)] } else if( method == "recessive" ) { flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps)) snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)] } else if( ( method == "additive" ) && ( nh > 0 ) ) { dsnps <- snps rsnps <- snps flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps)) dsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)] flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps)) rsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)] snps <- rbind(dsnps,rsnps) } #mafs is a (# SNPs)x(# lines) matrix. The columns of mafs are identical, and the ij^th element is the average #allele frequency for the SNP in the i^th row. #if(use == "all") imputes missing SNP type values with the expected (average) allele frequency. if( use == "all" ) { mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps)) snps[is.na(snps)] <- mafs[is.na(snps)] } else if( use == "complete.obs" ) { mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps)) snps <- snps[rowSums(is.na(snps))==0,] } mafs_comp <- 1-mafs snps_comp <- 1-snps n <- ncol(snps) K <- matrix(nrow=n,ncol=n) diag(K) <- 1 #Create the k term on page 1422 of Loiselle et al. (1995) missing <- rep(NA, dim(snps)[1]) for(i in 1:dim(snps)[1]) { missing[i] <- sum(is.na(snps[i,])) } for(i in 1:(n-1)) { for(j in (i+1):n) { Num_First_Term_1 <- (snps[,i]-mafs[,i])*(snps[,j]-mafs[,j]) Num_First_Term_2 <- (snps_comp[,i]-mafs_comp[,i])*(snps_comp[,j]-mafs_comp[,j]) First_Term <- sum(Num_First_Term_1)+sum(Num_First_Term_2) Num_Second_Term_1 <- mafs[,i]*(1-mafs[,i]) Num_Second_Term_2 <- mafs_comp[,i]*(1-mafs_comp[,i]) Num_Second_Term_Bias_Correction <- 1/((2*n)-missing - 1) Num_Second_Term <- Num_Second_Term_1 + Num_Second_Term_2 Second_Term <- sum(Num_Second_Term*Num_Second_Term_Bias_Correction) Third_Term <- sum(Num_Second_Term) f <- (First_Term + Second_Term)/Third_Term K[i,j] <- f if(K[i,j]<0) K[i,j]=0 K[j,i] <- K[i,j] } } return(K) } #============================================================================================= `GAPIT.kinship.separation` <- function(PCs=NULL,EV=NULL,nPCs=0 ){ #Object: To calculate kinship from PCS # PCs: the principal component as columns and individual as rows, the first column is taxa # EV: Eigen values # nPCs: the number of front PCs excluded to calculate kinship #Output: kinship #Authors: Huihui Li and Zhiwu Zhang #Last update: April 17, 2012 ############################################################################################## print("Calling GAPIT.kinship.separation") Total.number.PCs=ncol(PCs) n=nrow(PCs) print(Total.number.PCs) print(n) #Choose Total.number.PCs-nPCs PCs and EV to calculate K sep.PCs=PCs[, (nPCs+2):(Total.number.PCs)] #first column is taxa sep.EV=EV[(nPCs+1):Total.number.PCs] Weighted.sep.EV=sep.EV/sum(sep.EV) #X=t(t(sep.PCs)*Weighted.sep.EV) X=sep.PCs XMean= apply(X,2,mean) X=as.matrix(X-XMean) K=tcrossprod((X), (X)) #Extract diagonals i =1:n j=(i-1)*n index=i+j d=K[index] DL=min(d) DU=max(d) floor=min(K) K=(K-floor)/(DL-floor) MD=(DU-floor)/(DL-floor) if(is.na(K[1,1])) stop ("GAPIT says: Missing data is not allowed for numerical genotype data") if(MD>2)K[index]=K[index]/(MD-1)+1 print("GAPIT.kinship.separation called succesfuly") return (K) } #============================================================================================= if(!require(gplots)) install.packages("gplots") if(!require(LDheatmap)) install.packages("LDheatmap") if(!require(genetics)) install.packages("genetics") if(!require(ape)) install.packages("ape") if(!require(compiler)) install.packages("compiler") if(!require(EMMREML)) install.packages("EMMREML") if(!require(scatterplot3d)) install.packages("scatterplot3d") if(!'multtest'%in% installed.packages()[,"Package"]){ if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("multtest") BiocManager::install("snpStats") } ############################################################################################################################################## ###MLMM - Multi-Locus Mixed Model ###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH ####### # ##note: require EMMA #library(emma) #source('emma.r') # ##REQUIRED DATA & FORMAT # #PHENOTYPE - Y: a vector of length m, with names(Y)=individual names #GENOTYPE - X: a n by m matrix, where n=number of individuals, m=number of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names #KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names #each of these data being sorted in the same way, according to the individual name # ##FOR PLOTING THE GWAS RESULTS #SNP INFORMATION - snp_info: a data frame having at least 3 columns: # - 1 named 'SNP', with SNP names (same as colnames(X)), # - 1 named 'Chr', with the chromosome number to which belong each SNP # - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to. ####### # ##FUNCTIONS USE #save this file somewhere on your computer and source it! #source('path/mlmm.r') # ###FORWARD + BACKWARD ANALYSES #mygwas<-mlmm(Y,X,K,nbchunks,maxsteps) #X,Y,K as described above #nbchunks: an integer defining the number of chunks of X to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory #maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0, # however to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used. # It's value must be specified as an integer >= 3 # ###RESULTS # ##STEPWISE TABLE #mygwas$step_table # ##PLOTS # ##PLOTS FORM THE FORWARD TABLE #plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC')) # ##RSS PLOT #plot_step_RSS(mygwas) # ##GWAS MANHATTAN PLOTS # #FORWARD STEPS #plot_fwd_GWAS(mygwas,step,snp_info,pval_filt) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # #OPTIMAL MODELS #Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria # #plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # ##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST #plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # #plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # ##QQPLOTS of pvalues #qqplot_fwd_GWAS(mygwas,nsteps) #nsteps=maximum number of forward steps to be displayed # #qqplot_opt_GWAS(mygwas,opt=c('extBIC','mbonf')) # ############################################################################################################################################## mlmm<-function(Y,X,K,nbchunks,maxsteps,thresh = NULL) { n<-length(Y) m<-ncol(X) stopifnot(ncol(K) == n) stopifnot(nrow(K) == n) stopifnot(nrow(X) == n) stopifnot(nbchunks >= 2) stopifnot(maxsteps >= 3) #INTERCEPT Xo<-rep(1,n) #K MATRIX NORMALISATION K_norm<-(n-1)/sum((diag(n)-matrix(1,n,n)/n)*K)*K rm(K) #step 0 : NULL MODEL cof_fwd<-list() cof_fwd[[1]]<-as.matrix(Xo) colnames(cof_fwd[[1]])<-'Xo' mod_fwd<-list() mod_fwd[[1]]<-emma.REMLE(Y,cof_fwd[[1]],K_norm) herit_fwd<-list() herit_fwd[[1]]<-mod_fwd[[1]]$vg/(mod_fwd[[1]]$vg+mod_fwd[[1]]$ve) RSSf<-list() RSSf[[1]]<-'NA' RSS_H0<-list() RSS_H0[[1]]<-'NA' df1<-1 df2<-list() df2[[1]]<-'NA' Ftest<-list() Ftest[[1]]<-'NA' pval<-list() pval[[1]]<-'NA' fwd_lm<-list() cat('null model done! pseudo-h=',round(herit_fwd[[1]],3),'\n') #step 1 : EMMAX M<-solve(chol(mod_fwd[[1]]$vg*K_norm+mod_fwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[1]]) fwd_lm[[1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[1]]$residuals Q_<-qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[1]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[2]]<-unlist(RSS) RSS_H0[[2]]<-sum(Res_H0^2) df2[[2]]<-n-df1-ncol(cof_fwd[[1]]) Ftest[[2]]<-(rep(RSS_H0[[2]],length(RSSf[[2]]))/RSSf[[2]]-1)*df2[[2]]/df1 pval[[2]]<-pf(Ftest[[2]],df1,df2[[2]],lower.tail=FALSE) cof_fwd[[2]]<-cbind(cof_fwd[[1]],X[,colnames(X) %in% names(which(RSSf[[2]]==min(RSSf[[2]]))[1])]) colnames(cof_fwd[[2]])<-c(colnames(cof_fwd[[1]]),names(which(RSSf[[2]]==min(RSSf[[2]]))[1])) mod_fwd[[2]]<-emma.REMLE(Y,cof_fwd[[2]],K_norm) herit_fwd[[2]]<-mod_fwd[[2]]$vg/(mod_fwd[[2]]$vg+mod_fwd[[2]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) cat('step 1 done! pseudo-h=',round(herit_fwd[[2]],3),'\n') #FORWARD for (i in 3:(maxsteps)) { if (herit_fwd[[i-2]] < 0.01) break else { M<-solve(chol(mod_fwd[[i-1]]$vg*K_norm+mod_fwd[[i-1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[i-1]]) fwd_lm[[i-1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[i-1]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[i-1]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[i]]<-unlist(RSS) RSS_H0[[i]]<-sum(Res_H0^2) df2[[i]]<-n-df1-ncol(cof_fwd[[i-1]]) Ftest[[i]]<-(rep(RSS_H0[[i]],length(RSSf[[i]]))/RSSf[[i]]-1)*df2[[i]]/df1 pval[[i]]<-pf(Ftest[[i]],df1,df2[[i]],lower.tail=FALSE) cof_fwd[[i]]<-cbind(cof_fwd[[i-1]],X[,colnames(X) %in% names(which(RSSf[[i]]==min(RSSf[[i]]))[1])]) colnames(cof_fwd[[i]])<-c(colnames(cof_fwd[[i-1]]),names(which(RSSf[[i]]==min(RSSf[[i]]))[1])) mod_fwd[[i]]<-emma.REMLE(Y,cof_fwd[[i]],K_norm) herit_fwd[[i]]<-mod_fwd[[i]]$vg/(mod_fwd[[i]]$vg+mod_fwd[[i]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)} cat('step ',i-1,' done! pseudo-h=',round(herit_fwd[[i]],3),'\n')} rm(i) ##gls at last forward step M<-solve(chol(mod_fwd[[length(mod_fwd)]]$vg*K_norm+mod_fwd[[length(mod_fwd)]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[length(mod_fwd)]]) fwd_lm[[length(mod_fwd)]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[length(mod_fwd)]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[length(mod_fwd)]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[length(mod_fwd)+1]]<-unlist(RSS) RSS_H0[[length(mod_fwd)+1]]<-sum(Res_H0^2) df2[[length(mod_fwd)+1]]<-n-df1-ncol(cof_fwd[[length(mod_fwd)]]) Ftest[[length(mod_fwd)+1]]<-(rep(RSS_H0[[length(mod_fwd)+1]],length(RSSf[[length(mod_fwd)+1]]))/RSSf[[length(mod_fwd)+1]]-1)*df2[[length(mod_fwd)+1]]/df1 pval[[length(mod_fwd)+1]]<-pf(Ftest[[length(mod_fwd)+1]],df1,df2[[length(mod_fwd)+1]],lower.tail=FALSE) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) ##get max pval at each forward step max_pval_fwd<-vector(mode="numeric",length=length(fwd_lm)) max_pval_fwd[1]<-0 for (i in 2:length(fwd_lm)) {max_pval_fwd[i]<-max(fwd_lm[[i]]$coef[2:i,4])} rm(i) ##get the number of parameters & Loglikelihood from ML at each step mod_fwd_LL<-list() mod_fwd_LL[[1]]<-list(nfixed=ncol(cof_fwd[[1]]),LL=emma.MLE(Y,cof_fwd[[1]],K_norm)$ML) for (i in 2:length(cof_fwd)) {mod_fwd_LL[[i]]<-list(nfixed=ncol(cof_fwd[[i]]),LL=emma.MLE(Y,cof_fwd[[i]],K_norm)$ML)} rm(i) cat('backward analysis','\n') ##BACKWARD (1st step == last fwd step) dropcof_bwd<-list() cof_bwd<-list() mod_bwd <- list() bwd_lm<-list() herit_bwd<-list() dropcof_bwd[[1]]<-'NA' cof_bwd[[1]]<-as.matrix(cof_fwd[[length(mod_fwd)]][,!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]]) colnames(cof_bwd[[1]])<-colnames(cof_fwd[[length(mod_fwd)]])[!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]] mod_bwd[[1]]<-emma.REMLE(Y,cof_bwd[[1]],K_norm) herit_bwd[[1]]<-mod_bwd[[1]]$vg/(mod_bwd[[1]]$vg+mod_bwd[[1]]$ve) M<-solve(chol(mod_bwd[[1]]$vg*K_norm+mod_bwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cof_bwd[[1]]) bwd_lm[[1]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t) for (i in 2:length(mod_fwd)) { dropcof_bwd[[i]]<-(colnames(cof_bwd[[i-1]])[2:ncol(cof_bwd[[i-1]])])[which(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])==min(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])))] cof_bwd[[i]]<-as.matrix(cof_bwd[[i-1]][,!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]]) colnames(cof_bwd[[i]])<-colnames(cof_bwd[[i-1]])[!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]] mod_bwd[[i]]<-emma.REMLE(Y,cof_bwd[[i]],K_norm) herit_bwd[[i]]<-mod_bwd[[i]]$vg/(mod_bwd[[i]]$vg+mod_bwd[[i]]$ve) M<-solve(chol(mod_bwd[[i]]$vg*K_norm+mod_bwd[[i]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cof_bwd[[i]]) bwd_lm[[i]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t)} rm(i) ##get max pval at each backward step max_pval_bwd<-vector(mode="numeric",length=length(bwd_lm)) for (i in 1:(length(bwd_lm)-1)) {max_pval_bwd[i]<-max(bwd_lm[[i]]$coef[2:(length(bwd_lm)+1-i),4])} max_pval_bwd[length(bwd_lm)]<-0 ##get the number of parameters & Loglikelihood from ML at each step mod_bwd_LL<-list() mod_bwd_LL[[1]]<-list(nfixed=ncol(cof_bwd[[1]]),LL=emma.MLE(Y,cof_bwd[[1]],K_norm)$ML) for (i in 2:length(cof_bwd)) {mod_bwd_LL[[i]]<-list(nfixed=ncol(cof_bwd[[i]]),LL=emma.MLE(Y,cof_bwd[[i]],K_norm)$ML)} rm(i) cat('creating output','\n') ##Forward Table: Fwd + Bwd Tables #Compute parameters for model criteria BIC<-function(x){-2*x$LL+(x$nfixed+1)*log(n)} extBIC<-function(x){BIC(x)+2*lchoose(m,x$nfixed-1)} fwd_table<-data.frame(step=ncol(cof_fwd[[1]])-1,step_=paste('fwd',ncol(cof_fwd[[1]])-1,sep=''),cof='NA',ncof=ncol(cof_fwd[[1]])-1,h2=herit_fwd[[1]] ,maxpval=max_pval_fwd[1],BIC=BIC(mod_fwd_LL[[1]]),extBIC=extBIC(mod_fwd_LL[[1]])) for (i in 2:(length(mod_fwd))) {fwd_table<-rbind(fwd_table, data.frame(step=ncol(cof_fwd[[i]])-1,step_=paste('fwd',ncol(cof_fwd[[i]])-1,sep=''),cof=paste('+',colnames(cof_fwd[[i]])[i],sep=''),ncof=ncol(cof_fwd[[i]])-1,h2=herit_fwd[[i]] ,maxpval=max_pval_fwd[i],BIC=BIC(mod_fwd_LL[[i]]),extBIC=extBIC(mod_fwd_LL[[i]])))} rm(i) bwd_table<-data.frame(step=length(mod_fwd),step_=paste('bwd',0,sep=''),cof=paste('-',dropcof_bwd[[1]],sep=''),ncof=ncol(cof_bwd[[1]])-1,h2=herit_bwd[[1]] ,maxpval=max_pval_bwd[1],BIC=BIC(mod_bwd_LL[[1]]),extBIC=extBIC(mod_bwd_LL[[1]])) for (i in 2:(length(mod_bwd))) {bwd_table<-rbind(bwd_table, data.frame(step=length(mod_fwd)+i-1,step_=paste('bwd',i-1,sep=''),cof=paste('-',dropcof_bwd[[i]],sep=''),ncof=ncol(cof_bwd[[i]])-1,h2=herit_bwd[[i]] ,maxpval=max_pval_bwd[i],BIC=BIC(mod_bwd_LL[[i]]),extBIC=extBIC(mod_bwd_LL[[i]])))} rm(i,BIC,extBIC,max_pval_fwd,max_pval_bwd,dropcof_bwd) fwdbwd_table<-rbind(fwd_table,bwd_table) #RSS for plot mod_fwd_RSS<-vector() mod_fwd_RSS[1]<-sum((Y-cof_fwd[[1]]%*%fwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_fwd)) {mod_fwd_RSS[i]<-sum((Y-cof_fwd[[i]]%*%fwd_lm[[i]]$coef[,1])^2)} mod_bwd_RSS<-vector() mod_bwd_RSS[1]<-sum((Y-cof_bwd[[1]]%*%bwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_bwd)) {mod_bwd_RSS[i]<-sum((Y-cof_bwd[[i]]%*%bwd_lm[[i]]$coef[,1])^2)} expl_RSS<-c(1-sapply(mod_fwd_RSS,function(x){x/mod_fwd_RSS[1]}),1-sapply(mod_bwd_RSS,function(x){x/mod_bwd_RSS[length(mod_bwd_RSS)]})) h2_RSS<-c(unlist(herit_fwd),unlist(herit_bwd))*(1-expl_RSS) unexpl_RSS<-1-expl_RSS-h2_RSS plot_RSS<-t(apply(cbind(expl_RSS,h2_RSS,unexpl_RSS),1,cumsum)) #GLS pvals at each step pval_step<-list() pval_step[[1]]<-list(out=data.frame("SNP"=colnames(X),"pval"=pval[[2]]),"cof"=NA, "coef"=fwd_lm[[1]]$coef) for (i in 2:(length(mod_fwd))) {pval_step[[i]]<-list(out=rbind(data.frame(SNP=colnames(cof_fwd[[i]])[-1],'pval'=fwd_lm[[i]]$coef[2:i,4]), data.frame(SNP=colnames(X)[-which(colnames(X) %in% colnames(cof_fwd[[i]]))],'pval'=pval[[i+1]])),"cof"=colnames(cof_fwd[[i]])[-1], "coef"=fwd_lm[[i]]$coef)} #GLS pvals for best models according to extBIC and mbonf opt_extBIC<-fwdbwd_table[which(fwdbwd_table$extBIC==min(fwdbwd_table$extBIC))[1],] opt_mbonf<-(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof))[1],] if(! is.null(thresh)){ opt_thresh<-(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),]$ncof))[1],] } bestmodel_pvals<-function(model) {if(substr(model$step_,start=0,stop=3)=='fwd') { pval_step[[as.integer(substring(model$step_,first=4))+1]]} else if (substr(model$step_,start=0,stop=3)=='bwd') { cof<-cof_bwd[[as.integer(substring(model$step_,first=4))+1]] mixedmod<-emma.REMLE(Y,cof,K_norm) M<-solve(chol(mixedmod$vg*K_norm+mixedmod$ve*diag(n))) Y_t<-crossprod(M,Y) cof_t<-crossprod(M,cof) GLS_lm<-summary(lm(Y_t~0+cof_t)) Res_H0<-GLS_lm$residuals Q_ <- qr.Q(qr(cof_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof)-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf<-unlist(RSS) RSS_H0<-sum(Res_H0^2) df2<-n-df1-ncol(cof) Ftest<-(rep(RSS_H0,length(RSSf))/RSSf-1)*df2/df1 pval<-pf(Ftest,df1,df2,lower.tail=FALSE) list('out'=rbind(data.frame(SNP=colnames(cof)[-1],'pval'=GLS_lm$coef[2:(ncol(cof)),4]), data.frame('SNP'=colnames(X)[-which(colnames(X) %in% colnames(cof))],'pval'=pval)), 'cof'=colnames(cof)[-1], 'coef'=GLS_lm$coef)} else {cat('error \n')}} opt_extBIC_out<-bestmodel_pvals(opt_extBIC) opt_mbonf_out<-bestmodel_pvals(opt_mbonf) if(! is.null(thresh)){ opt_thresh_out<-bestmodel_pvals(opt_thresh) } output <- list(step_table=fwdbwd_table,pval_step=pval_step,RSSout=plot_RSS,bonf_thresh=-log10(0.05/m),opt_extBIC=opt_extBIC_out,opt_mbonf=opt_mbonf_out) if(! is.null(thresh)){ output$thresh <- -log10(thresh) output$opt_thresh <- opt_thresh_out } return(output) } ############################################################################################################################################## ###MLMM_COF - Multi-Locus Mixed Model ###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH ####### # ##note: require EMMA #library(emma) #source('emma.r') # ##REQUIRED DATA & FORMAT # #PHENOTYPE - Y: a vector of length m, with names(Y)=individual names #GENOTYPE - X: a n by m matrix, where n=number of individuals, m=number of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names #KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names #COVARIANCE MATRIX - cofs: a n by p matrix, where n=number of individuals, p=number of covariates in the matrix (e.g. PC axes) #each of these data being sorted in the same way, according to the individual name # ##FOR PLOTING THE GWAS RESULTS #SNP INFORMATION - snp_info: a data frame having at least 3 columns: # - 1 named 'SNP', with SNP names (same as colnames(X)), # - 1 named 'Chr', with the chromosome number to which belong each SNP # - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to. ####### # ##FUNCTIONS USE #save this file somewhere on your computer and source it! #source('path/mlmm.r') # ###FORWARD + BACKWARD ANALYSES #mygwas<-mlmm_cof(Y,X,K,nbchunks,maxsteps) #X,Y,K as described above #nbchunks: an integer defining the number of chunks of X to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory #maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0, # however to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used. # It's value must be specified as an integer >= 3 # ###RESULTS # ##STEPWISE TABLE #mygwas$step_table # ##PLOTS # ##PLOTS FORM THE FORWARD TABLE #plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC')) # ##RSS PLOT #plot_step_RSS(mygwas) # ##GWAS MANHATTAN PLOTS # #FORWARD STEPS #plot_fwd_GWAS(mygwas,step,snp_info,pval_filt) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # #OPTIMAL MODELS #Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria # #plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # ##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST #plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # #plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # ##QQPLOTS of pvalues #qqplot_fwd_GWAS(mygwas,nsteps) #nsteps=maximum number of forward steps to be displayed # #qqplot_opt_GWAS(mygwas,opt=c('extBIC','mbonf')) # ############################################################################################################################################## mlmm_cof<-function(Y,X,cofs,K,nbchunks,maxsteps,thresh = NULL) { n<-length(Y) m<-ncol(X) stopifnot(ncol(K) == n) stopifnot(nrow(K) == n) stopifnot(nrow(X) == n) stopifnot(nrow(cofs) == n) stopifnot(nbchunks >= 2) stopifnot(maxsteps >= 3) #INTERCEPT Xo<-rep(1,n) #K MATRIX NORMALISATION K_norm<-(n-1)/sum((diag(n)-matrix(1,n,n)/n)*K)*K rm(K) #step 0 : NULL MODEL fix_cofs<-cbind(Xo,cofs) rm(cofs) addcof_fwd<-list() addcof_fwd[[1]]<-'NA' cof_fwd<-list() cof_fwd[[1]]<-as.matrix(X[,colnames(X) %in% addcof_fwd[[1]]]) mod_fwd<-list() mod_fwd[[1]]<-emma.REMLE(Y,cbind(fix_cofs,cof_fwd[[1]]),K_norm) herit_fwd<-list() herit_fwd[[1]]<-mod_fwd[[1]]$vg/(mod_fwd[[1]]$vg+mod_fwd[[1]]$ve) RSSf<-list() RSSf[[1]]<-'NA' RSS_H0<-list() RSS_H0[[1]]<-'NA' df1<-1 df2<-list() df2[[1]]<-'NA' Ftest<-list() Ftest[[1]]<-'NA' pval<-list() pval[[1]]<-'NA' fwd_lm<-list() cat('null model done! pseudo-h=',round(herit_fwd[[1]],3),'\n') #step 1 : EMMAX M<-solve(chol(mod_fwd[[1]]$vg*K_norm+mod_fwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cbind(fix_cofs,cof_fwd[[1]])) fwd_lm[[1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[1]]$residuals Q_<-qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% addcof_fwd[[1]]])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% addcof_fwd[[1]]])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[1]])))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[2]]<-unlist(RSS) RSS_H0[[2]]<-sum(Res_H0^2) df2[[2]]<-n-df1-ncol(fix_cofs)-ncol(cof_fwd[[1]]) Ftest[[2]]<-(rep(RSS_H0[[2]],length(RSSf[[2]]))/RSSf[[2]]-1)*df2[[2]]/df1 pval[[2]]<-pf(Ftest[[2]],df1,df2[[2]],lower.tail=FALSE) addcof_fwd[[2]]<-names(which(RSSf[[2]]==min(RSSf[[2]]))[1]) cof_fwd[[2]]<-cbind(cof_fwd[[1]],X[,colnames(X) %in% addcof_fwd[[2]]]) colnames(cof_fwd[[2]])[ncol(cof_fwd[[2]])]<-addcof_fwd[[2]] mod_fwd[[2]]<-emma.REMLE(Y,cbind(fix_cofs,cof_fwd[[2]]),K_norm) herit_fwd[[2]]<-mod_fwd[[2]]$vg/(mod_fwd[[2]]$vg+mod_fwd[[2]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) cat('step 1 done! pseudo-h=',round(herit_fwd[[2]],3),'\n') #FORWARD for (i in 3:(maxsteps)) { if (herit_fwd[[i-2]] < 0.01) break else { M<-solve(chol(mod_fwd[[i-1]]$vg*K_norm+mod_fwd[[i-1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cbind(fix_cofs,cof_fwd[[i-1]])) fwd_lm[[i-1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[i-1]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[i-1]])))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[i]]<-unlist(RSS) RSS_H0[[i]]<-sum(Res_H0^2) df2[[i]]<-n-df1-ncol(fix_cofs)-ncol(cof_fwd[[i-1]]) Ftest[[i]]<-(rep(RSS_H0[[i]],length(RSSf[[i]]))/RSSf[[i]]-1)*df2[[i]]/df1 pval[[i]]<-pf(Ftest[[i]],df1,df2[[i]],lower.tail=FALSE) addcof_fwd[[i]]<-names(which(RSSf[[i]]==min(RSSf[[i]]))[1]) cof_fwd[[i]]<-cbind(cof_fwd[[i-1]],X[,colnames(X) %in% addcof_fwd[[i]]]) colnames(cof_fwd[[i]])[ncol(cof_fwd[[i]])]<-addcof_fwd[[i]] mod_fwd[[i]]<-emma.REMLE(Y,cbind(fix_cofs,cof_fwd[[i]]),K_norm) herit_fwd[[i]]<-mod_fwd[[i]]$vg/(mod_fwd[[i]]$vg+mod_fwd[[i]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)} cat('step ',i-1,' done! pseudo-h=',round(herit_fwd[[i]],3),'\n')} rm(i) ##gls at last forward step M<-solve(chol(mod_fwd[[length(mod_fwd)]]$vg*K_norm+mod_fwd[[length(mod_fwd)]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cbind(fix_cofs,cof_fwd[[length(mod_fwd)]])) fwd_lm[[length(mod_fwd)]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[length(mod_fwd)]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[length(mod_fwd)]])))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[length(mod_fwd)+1]]<-unlist(RSS) RSS_H0[[length(mod_fwd)+1]]<-sum(Res_H0^2) df2[[length(mod_fwd)+1]]<-n-df1-ncol(fix_cofs)-ncol(cof_fwd[[length(mod_fwd)]]) Ftest[[length(mod_fwd)+1]]<-(rep(RSS_H0[[length(mod_fwd)+1]],length(RSSf[[length(mod_fwd)+1]]))/RSSf[[length(mod_fwd)+1]]-1)*df2[[length(mod_fwd)+1]]/df1 pval[[length(mod_fwd)+1]]<-pf(Ftest[[length(mod_fwd)+1]],df1,df2[[length(mod_fwd)+1]],lower.tail=FALSE) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) ##get max pval at each forward step max_pval_fwd<-vector(mode="numeric",length=length(fwd_lm)) max_pval_fwd[1]<-0 for (i in 2:length(fwd_lm)) {max_pval_fwd[i]<-max(fwd_lm[[i]]$coef[(ncol(fix_cofs)+1):(ncol(fix_cofs)+ncol(cof_fwd[[i]])),4])} rm(i) ##get the number of parameters & Loglikelihood from ML at each step mod_fwd_LL<-list() mod_fwd_LL[[1]]<-list(nfixed=ncol(cbind(fix_cofs,cof_fwd[[1]])),LL=emma.MLE(Y,cbind(fix_cofs,cof_fwd[[1]]),K_norm)$ML) for (i in 2:length(cof_fwd)) {mod_fwd_LL[[i]]<-list(nfixed=ncol(cbind(fix_cofs,cof_fwd[[i]])),LL=emma.MLE(Y,cbind(fix_cofs,cof_fwd[[i]]),K_norm)$ML)} rm(i) cat('backward analysis','\n') ##BACKWARD (1st step == last fwd step) dropcof_bwd<-list() cof_bwd<-list() mod_bwd <- list() bwd_lm<-list() herit_bwd<-list() dropcof_bwd[[1]]<-'NA' cof_bwd[[1]]<-as.matrix(cof_fwd[[length(mod_fwd)]][,!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]]) colnames(cof_bwd[[1]])<-colnames(cof_fwd[[length(mod_fwd)]])[!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]] mod_bwd[[1]]<-emma.REMLE(Y,cbind(fix_cofs,cof_bwd[[1]]),K_norm) herit_bwd[[1]]<-mod_bwd[[1]]$vg/(mod_bwd[[1]]$vg+mod_bwd[[1]]$ve) M<-solve(chol(mod_bwd[[1]]$vg*K_norm+mod_bwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cbind(fix_cofs,cof_bwd[[1]])) bwd_lm[[1]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t) for (i in 2:length(mod_fwd)) { dropcof_bwd[[i]]<-colnames(cof_bwd[[i-1]])[which(abs(bwd_lm[[i-1]]$coef[(ncol(fix_cofs)+1):nrow(bwd_lm[[i-1]]$coef),3])==min(abs(bwd_lm[[i-1]]$coef[(ncol(fix_cofs)+1):nrow(bwd_lm[[i-1]]$coef),3])))] cof_bwd[[i]]<-as.matrix(cof_bwd[[i-1]][,!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]]) colnames(cof_bwd[[i]])<-colnames(cof_bwd[[i-1]])[!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]] mod_bwd[[i]]<-emma.REMLE(Y,cbind(fix_cofs,cof_bwd[[i]]),K_norm) herit_bwd[[i]]<-mod_bwd[[i]]$vg/(mod_bwd[[i]]$vg+mod_bwd[[i]]$ve) M<-solve(chol(mod_bwd[[i]]$vg*K_norm+mod_bwd[[i]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cbind(fix_cofs,cof_bwd[[i]])) bwd_lm[[i]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t)} rm(i) ##get max pval at each backward step max_pval_bwd<-vector(mode="numeric",length=length(bwd_lm)) for (i in 1:(length(bwd_lm)-1)) {max_pval_bwd[i]<-max(bwd_lm[[i]]$coef[(ncol(fix_cofs)+1):(ncol(fix_cofs)+ncol(cof_bwd[[i]])),4])} max_pval_bwd[length(bwd_lm)]<-0 ##get the number of parameters & Loglikelihood from ML at each step mod_bwd_LL<-list() mod_bwd_LL[[1]]<-list(nfixed=ncol(cbind(fix_cofs,cof_bwd[[1]])),LL=emma.MLE(Y,cbind(fix_cofs,cof_bwd[[1]]),K_norm)$ML) for (i in 2:length(cof_bwd)) {mod_bwd_LL[[i]]<-list(nfixed=ncol(cbind(fix_cofs,cof_bwd[[i]])),LL=emma.MLE(Y,cbind(fix_cofs,cof_bwd[[i]]),K_norm)$ML)} rm(i) cat('creating output','\n') ##Forward Table: Fwd + Bwd Tables #Compute parameters for model criteria BIC<-function(x){-2*x$LL+(x$nfixed+1)*log(n)} extBIC<-function(x){BIC(x)+2*lchoose(m,x$nfixed-1)} fwd_table<-data.frame(step=ncol(cof_fwd[[1]]),step_=paste('fwd',ncol(cof_fwd[[1]]),sep=''),cof=paste('+',addcof_fwd[[1]],sep=''),ncof=ncol(cof_fwd[[1]]),h2=herit_fwd[[1]] ,maxpval=max_pval_fwd[1],BIC=BIC(mod_fwd_LL[[1]]),extBIC=extBIC(mod_fwd_LL[[1]])) for (i in 2:(length(mod_fwd))) {fwd_table<-rbind(fwd_table, data.frame(step=ncol(cof_fwd[[i]]),step_=paste('fwd',ncol(cof_fwd[[i]]),sep=''),cof=paste('+',addcof_fwd[[i]],sep=''),ncof=ncol(cof_fwd[[i]]),h2=herit_fwd[[i]] ,maxpval=max_pval_fwd[i],BIC=BIC(mod_fwd_LL[[i]]),extBIC=extBIC(mod_fwd_LL[[i]])))} rm(i) bwd_table<-data.frame(step=length(mod_fwd),step_=paste('bwd',0,sep=''),cof=paste('-',dropcof_bwd[[1]],sep=''),ncof=ncol(cof_bwd[[1]]),h2=herit_bwd[[1]] ,maxpval=max_pval_bwd[1],BIC=BIC(mod_bwd_LL[[1]]),extBIC=extBIC(mod_bwd_LL[[1]])) for (i in 2:(length(mod_bwd))) {bwd_table<-rbind(bwd_table, data.frame(step=length(mod_fwd)+i-1,step_=paste('bwd',i-1,sep=''),cof=paste('-',dropcof_bwd[[i]],sep=''),ncof=ncol(cof_bwd[[i]]),h2=herit_bwd[[i]] ,maxpval=max_pval_bwd[i],BIC=BIC(mod_bwd_LL[[i]]),extBIC=extBIC(mod_bwd_LL[[i]])))} rm(i,BIC,extBIC,max_pval_fwd,max_pval_bwd,dropcof_bwd) fwdbwd_table<-rbind(fwd_table,bwd_table) #RSS for plot #null model only with intercept null<-emma.REMLE(Y,as.matrix(Xo),K_norm) M<-solve(chol(null$vg*K_norm+null$ve*diag(n))) Y_t<-crossprod(M,Y) Xo_t<-crossprod(M,as.matrix(Xo)) null_lm<-summary(lm(Y_t~0+Xo_t)) rm(null,M,Y_t,Xo_t) RSS_null<-sum((Y-as.matrix(Xo)%*%null_lm$coef[,1])^2) mod_fwd_RSS<-vector() mod_fwd_RSS[1]<-sum((Y-cbind(fix_cofs,cof_fwd[[1]])%*%fwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_fwd)) {mod_fwd_RSS[i]<-sum((Y-cbind(fix_cofs,cof_fwd[[i]])%*%fwd_lm[[i]]$coef[,1])^2)} mod_bwd_RSS<-vector() mod_bwd_RSS[1]<-sum((Y-cbind(fix_cofs,cof_bwd[[1]])%*%bwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_bwd)) {mod_bwd_RSS[i]<-sum((Y-cbind(fix_cofs,cof_bwd[[i]])%*%bwd_lm[[i]]$coef[,1])^2)} expl_RSS<-c(1-sapply(mod_fwd_RSS,function(x){x/RSS_null}),1-sapply(mod_bwd_RSS,function(x){x/RSS_null})) fix_cofs_RSS<-rep(expl_RSS[1],length(expl_RSS)) cofs_RSS<-expl_RSS-fix_cofs_RSS h2_RSS<-c(unlist(herit_fwd),unlist(herit_bwd))*(1-expl_RSS) unexpl_RSS<-1-expl_RSS-h2_RSS plot_RSS<-t(apply(cbind(fix_cofs_RSS,cofs_RSS,h2_RSS,unexpl_RSS),1,cumsum)) #GLS pvals at each step pval_step<-list() pval_step[[1]]<-list(out=data.frame('SNP'=names(pval[[2]]),'pval'=pval[[2]]),cof=addcof_fwd[[1]], "coef"=fwd_lm[[1]]$coef) for (i in 2:(length(mod_fwd))) { pval_step[[i]]<-list('out'=rbind(data.frame('SNP'=colnames(cof_fwd[[i]]),'pval'=fwd_lm[[i]]$coef[(ncol(fix_cofs)+1):(ncol(fix_cofs)+ncol(cof_fwd[[i]])),4]), data.frame('SNP'=names(pval[[i+1]]),'pval'=pval[[i+1]])), 'cof'=colnames(cof_fwd[[i]]), 'coef'=fwd_lm[[i]]$coef) } #GLS pvals for best models according to extBIC and mbonf opt_extBIC<-fwdbwd_table[which(fwdbwd_table$extBIC==min(fwdbwd_table$extBIC))[1],] opt_mbonf<-(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof))[1],] if(! is.null(thresh)){ opt_thresh<-(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=thresh),]$ncof))[1],] } bestmodel_pvals<-function(model) {if(substr(model$step_,start=0,stop=3)=='fwd') { pval_step[[as.integer(substring(model$step_,first=4))+1]]} else if (substr(model$step_,start=0,stop=3)=='bwd') { cof<-cof_bwd[[as.integer(substring(model$step_,first=4))+1]] mixedmod<-emma.REMLE(Y,cbind(fix_cofs,cof),K_norm) M<-solve(chol(mixedmod$vg*K_norm+mixedmod$ve*diag(n))) Y_t<-crossprod(M,Y) cof_t<-crossprod(M,cbind(fix_cofs,cof)) GLS_lm<-summary(lm(Y_t~0+cof_t)) Res_H0<-GLS_lm$residuals Q_ <- qr.Q(qr(cof_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j)*round(m/nbchunks)+1):(m-ncol(cof))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf<-unlist(RSS) RSS_H0<-sum(Res_H0^2) df2<-n-df1-ncol(fix_cofs)-ncol(cof) Ftest<-(rep(RSS_H0,length(RSSf))/RSSf-1)*df2/df1 pval<-pf(Ftest,df1,df2,lower.tail=FALSE) list('out'=rbind(data.frame(SNP=colnames(cof),'pval'=GLS_lm$coef[(ncol(fix_cofs)+1):(ncol(fix_cofs)+ncol(cof)),4]), data.frame('SNP'=names(pval),'pval'=pval)), 'cof'=colnames(cof), 'coef'=GLS_lm$coef)} else {cat('error \n')}} opt_extBIC_out<-bestmodel_pvals(opt_extBIC) opt_mbonf_out<-bestmodel_pvals(opt_mbonf) if(! is.null(thresh)){ opt_thresh_out<-bestmodel_pvals(opt_thresh) } output <- list(step_table=fwdbwd_table,pval_step=pval_step,RSSout=plot_RSS,bonf_thresh=-log10(0.05/m),opt_extBIC=opt_extBIC_out,opt_mbonf=opt_mbonf_out) if(! is.null(thresh)){ output$thresh <- -log10(thresh) output$opt_thresh <- opt_thresh_out } return(output) } `GAPIT.replaceNaN` <- function(LL) { #handler of grids with NaN log #Authors: Zhiwu Zhang # Last update: may 12, 2011 ############################################################################################## #handler of grids with NaN log index=(LL=="NaN") if(length(index)>0) theMin=min(LL[!index]) if(length(index)<1) theMin="NaN" LL[index]=theMin return(LL) } #============================================================================================= `GAPIT2` <- function(Y=NULL,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,CV.Inheritance=NULL,GP=NULL,GK=NULL, group.from=1000000 ,group.to=1000000,group.by=10,DPP=100000, kinship.cluster="average", kinship.group='Mean',kinship.algorithm="VanRaden", bin.from=10000,bin.to=10000,bin.by=10000,inclosure.from=10,inclosure.to=10,inclosure.by=10, SNP.P3D=TRUE,SNP.effect="Add",SNP.impute="Middle",PCA.total=0, PCA.col=NULL,PCA.3d=FALSE, SNP.fraction = 1, seed = 123, BINS = 20,SNP.test=TRUE, SNP.MAF=0,FDR.Rate = 1, SNP.FDR=1,SNP.permutation=FALSE,SNP.CV=NULL,SNP.robust="GLM", file.from=1, file.to=1, file.total=NULL, file.fragment = 99999,file.path=NULL, file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, ngrid = 100, llim = -10, ulim = 10, esp = 1e-10, LD.chromosome=NULL,LD.location=NULL,LD.range=NULL, sangwich.top=NULL,sangwich.bottom=NULL,QC=TRUE,GTindex=NULL,LD=0.1, NJtree.group=NULL,NJtree.type=c("fan","unrooted"),plot.bin=10^5, file.output=TRUE,cutOff=0.01, Model.selection = FALSE,output.numerical = FALSE, output.hapmap = FALSE, Create.indicator = FALSE, QTN=NULL, QTN.round=1,QTN.limit=0, QTN.update=TRUE, QTN.method="Penalty", Major.allele.zero = FALSE, method.GLM="fast.lm",method.sub="reward",method.sub.final="reward",method.bin="static", bin.size=c(1000000),bin.selection=c(10,20,50,100,200,500,1000), memo="",Prior=NULL,ncpus=1,maxLoop=3,threshold.output=.01, WS=c(1e0,1e3,1e4,1e5,1e6,1e7),alpha=c(.01,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,1),maxOut=100,QTN.position=NULL,CG=NULL, converge=1,iteration.output=FALSE,acceleration=0,iteration.method="accum",PCA.View.output=TRUE,Geno.View.output=TRUE, plot.style="Oceanic",SUPER_GD=NULL,SUPER_GS=FALSE){ #Object: To perform GWAS and GPS (Genomic Prediction/Selection) #Designed by Zhiwu Zhang #Writen by Alex Lipka, Feng Tian ,You Tang and Zhiwu Zhang #Last update: Oct 23, 2015 by Jiabo Wang add REML threshold and SUPER GK ############################################################################################## print("--------------------- Welcome to GAPIT ----------------------------") echo=TRUE #GAPIT.Version=GAPIT.0000() Timmer=GAPIT.Timmer(Infor="GAPIT") Memory=GAPIT.Memory(Infor="GAPIT") #Genotype processing and calculation Kin and PC #First call to genotype to setup genotype data storage_PCA.total<-PCA.total #if(PCA.total>0){ #if(PCA.total<=3){PCA.total=4} #} #BUS algorithm #if(kinship.algorithm=="FARM-CPU") return (GAPIT.BUS(Y=Y,GDP=GD,GM=GM,CV=CV, # method.GLM=method.GLM,method.sub=method.sub,method.sub.final=method.sub.final,method.bin=method.bin, # bin.size=bin.size,bin.selection=bin.selection,file.output=file.output, # cutOff=cutOff,DPP=DPP,memo=memo,Prior=Prior,ncpus=ncpus,maxLoop=maxLoop, # kinship.algorithm=kinship.algorithm,GP=GP,threshold.output=threshold.output, # WS=WS,alpha=alpha,maxOut=maxOut,QTN.position=QTN.position,converge=converge, # iteration.output=iteration.output,acceleration=acceleration,iteration.method=iteration.method)) myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,kinship.algorithm=kinship.algorithm,PCA.total=PCA.total,SNP.fraction=SNP.fraction,SNP.test=SNP.test, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,NJtree.group=NJtree.group,NJtree.type=NJtree.type, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range, GP=GP,GK=GK,bin.size=NULL,inclosure.size=NULL, Timmer = Timmer,Memory=Memory, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,GTindex=NULL,file.output=file.output, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero,Geno.View.output=Geno.View.output,PCA.col=PCA.col,PCA.3d=PCA.3d) Timmer=myGenotype$Timmer Memory=myGenotype$Memory Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype for all") Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype for all") KI=myGenotype$KI PC=myGenotype$PC genoFormat=myGenotype$genoFormat hasGenotype=myGenotype$hasGenotype byFile=myGenotype$byFile fullGD=myGenotype$fullGD GD=myGenotype$GD GI=myGenotype$GI GT=myGenotype$GT G=myGenotype$G chor_taxa=myGenotype$chor_taxa #print(dim(GD)) #print(dim(GI)) rownames(GD)=GT colnames(GD)=GI[,1] if(output.numerical) write.table(GD, "GAPIT.Genotype.Numerical.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = NA) if(output.hapmap) write.table(myGenotype$G, "GAPIT.Genotype.hmp.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE) #In case of null Y and null GP, return genotype only if(is.null(Y) & is.null(GP)) return (list(GWAS=NULL,GPS=NULL,Pred=NULL,compression=NULL,kinship.optimum=NULL,kinship=myGenotype$KI,PCA=myGenotype$PC,GD=data.frame(cbind(as.data.frame(GT),as.data.frame(GD))),GI=GI,G=myGenotype$G)) #In case of null Y, return genotype only if(is.null(Y)) return (list(GWAS=NULL,GPS=NULL,Pred=NULL,compression=NULL,kinship.optimum=NULL,kinship=myGenotype$KI,PCA=myGenotype$PC,GD=data.frame(cbind(as.date.frame(GT),as.data.frame(GD))),Gi=GI,G=myGenotype$G)) rm(myGenotype) gc() PCA.total<-storage_PCA.total print("--------------------Processing traits----------------------------------") if(!is.null(Y)){ print("Phenotype provided!") if(ncol(Y)<2) stop ("Phenotype should have taxa name and one trait at least. Please correct phenotype file!") for (trait in 2: ncol(Y)) { traitname=colnames(Y)[trait] ###Statistical distributions of phenotype if(!is.null(Y) & file.output)ViewPhenotype<-GAPIT.Phenotype.View(myY=Y[,c(1,trait)],traitname=traitname,memo=memo) ###Correlation between phenotype and principal components if(!is.null(Y)&!is.null(PC) & file.output & PCA.total>0 & PCA.View.output){ myPPV<-GAPIT.Phenotype.PCA.View( PC=PC, myY=Y[,c(1,trait)] ) } #print(SNP.fraction) #print("!!!!") #print(GT) print(paste("Processing trait: ",traitname,sep="")) if(!is.null(memo)) traitname=paste(memo,".",traitname,sep="") #print("!!!!") #print(dim(Z)) #print(dim(KI)) #print(group.from) #print(group.to) gapitMain <- GAPIT.Main(Y=Y[,c(1,trait)],G=G,GD=GD,GM=GI,KI=KI,Z=Z,CV=CV,CV.Inheritance=CV.Inheritance,GP=GP,GK=GK,SNP.P3D=SNP.P3D,kinship.algorithm=kinship.algorithm, bin.from=bin.from,bin.to=bin.to,bin.by=bin.by,inclosure.from=inclosure.from,inclosure.to=inclosure.to,inclosure.by=inclosure.by, group.from=group.from,group.to=group.to,group.by=group.by,kinship.cluster=kinship.cluster,kinship.group=kinship.group,name.of.trait=traitname, file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, SNP.MAF= SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,SNP.impute=SNP.impute,PCA.total=PCA.total,GAPIT.Version=GAPIT.Version, GT=GT, SNP.fraction = SNP.fraction, seed = seed, BINS = BINS,SNP.test=SNP.test,DPP=DPP, SNP.permutation=SNP.permutation,NJtree.group=NJtree.group,NJtree.type=NJtree.type,plot.bin=plot.bin, LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,SNP.CV=SNP.CV,SNP.robust=SNP.robust, genoFormat=genoFormat,hasGenotype=hasGenotype,byFile=byFile,fullGD=fullGD,PC=PC,GI=GI,Timmer = Timmer, Memory = Memory, sangwich.top=sangwich.top,sangwich.bottom=sangwich.bottom,QC=QC,GTindex=GTindex,LD=LD,file.output=file.output,cutOff=cutOff, Model.selection = Model.selection, Create.indicator = Create.indicator, QTN=QTN, QTN.round=QTN.round,QTN.limit=QTN.limit, QTN.update=QTN.update, QTN.method=QTN.method, Major.allele.zero=Major.allele.zero, QTN.position=QTN.position,plot.style=plot.style,SUPER_GS=SUPER_GS,CG=CG,chor_taxa=chor_taxa) }# end of loop on trait if(ncol(Y>2) &file.output) { Timmer=gapitMain$Timmer Memory=gapitMain$Memory file=paste("GAPIT.", "All",".Timming.csv" ,sep = "") write.table(Timmer, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) file=paste("GAPIT.", "All",".Memory.Stage.csv" ,sep = "") write.table(Memory, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE) } if(ncol(Y)==2) { if (!SUPER_GS){ #Evaluate Power vs FDR and type I error myPower=NULL if(!is.null(gapitMain$GWAS))myPower=GAPIT.Power(WS=WS, alpha=alpha, maxOut=maxOut,seqQTN=QTN.position,GM=GM,GWAS=gapitMain$GWAS) h2= as.matrix(as.numeric(as.vector(gapitMain$Compression[,5]))/(as.numeric(as.vector(gapitMain$Compression[,5]))+as.numeric(as.vector(gapitMain$Compression[,6]))),length(gapitMain$Compression[,6]),1) colnames(h2)=c("Heritability") print("GAPIT accomplished successfully for single trait. Results are saved. GWAS are returned!") print("It is OK to see this: 'There were 50 or more warnings (use warnings() to see the first 50)'") return (list(QTN=gapitMain$QTN,GWAS=gapitMain$GWAS,h2=gapitMain$h2,Pred=gapitMain$Pred,compression=as.data.frame(cbind(gapitMain$Compression,h2)), kinship.optimum=gapitMain$kinship.optimum,kinship=gapitMain$kinship,PCA=gapitMain$PC, FDR=myPower$FDR,Power=myPower$Power,Power.Alpha=myPower$Power.Alpha,alpha=myPower$alpha,SUPER_GD=gapitMain$SUPER_GD,P=gapitMain$P,effect.snp=gapitMain$effect.snp,effect.cv=gapitMain$effect.cv)) }else{ h2= as.matrix(as.numeric(as.vector(gapitMain$Compression[,5]))/(as.numeric(as.vector(gapitMain$Compression[,5]))+as.numeric(as.vector(gapitMain$Compression[,6]))),length(gapitMain$Compression[,6]),1) colnames(h2)=c("Heritability") print("GAPIT accomplished successfully for single trait. Results are saved. GPS are returned!") print("It is OK to see this: 'There were 50 or more warnings (use warnings() to see the first 50)'") return (list(QTN=gapitMain$QTN,GWAS=gapitMain$GWAS,h2=gapitMain$h2,Pred=gapitMain$Pred,compression=as.data.frame(cbind(gapitMain$Compression,h2)), kinship.optimum=gapitMain$kinship.optimum,kinship=gapitMain$kinship,PCA=gapitMain$PC, SUPER_GD=gapitMain$SUPER_GD,P=gapitMain$P,effect.snp=gapitMain$effect.snp,effect.cv=gapitMain$effect.cv)) } }else{ print("GAPIT accomplished successfully for multiple traits. Results are saved") print("It is OK to see this: 'There were 50 or more warnings (use warnings() to see the first 50)'") return (list(QTN=NULL,GWAS=NULL,h2=NULL,Pred=NULL,compression=NULL,kinship.optimum=NULL,kinship=gapitMain$KI,PCA=gapitMain$PC,P=gapitMain$P,effect.snp=gapitMain$effect.snp,effect.cv=gapitMain$effect.cv)) } }# end ofdetecting null Y } #end of GAPIT function