## Bayesian Alphabet for Genomic Selection (BAGS) #Based on the source code originally developed by Rohan Fernando (http://taurus.ansci.iastate.edu/wiki/projects) #Modified by Yao Zhou, July 2015 #Revised by Zhiwu Zhang (Aprile 24, 2016) #Input #G: numeric genotype with individual as row and marker as column (n by m). #y: phenotype of single column (n by 1) #pi=.99,options are 0 (Bayes A), 1 (Bayes CPi) and between 0 and 1 (Bayes B) #burn.in=100, #burn.out=100, #vara=1/20.0, #mean2pq=0.5, #nua=4, #numMHIter=1 #recording: T or F to return MCMC results #Output #effect: marker effects (m by 1) `BAGS` <- function(X = NULL, y= NULL, pi=.99,burn.in=100,burn.out=100,vara=1/20.0,mean2pq=0.5,nua=4,numMHIter=1,recording=F){ cat("Welcome using BAGS (Bayesian Alphabet for Genomic Selection)\n") if(pi==1)model="Cpi" if(pi<1)model="AB" iteration=burn.in+burn.out nmarkers = ncol(X) #number of markers numiter = iteration #number of iterations nrecords = length(y) x = as.matrix(cbind(1,X)) y = as.matrix(y) if(pi==0) pi=1e-10 if(pi==1) pi=.99 logPi = log(pi) logPiComp = log(1-pi) varEffects = vara/(nmarkers*(1-pi)*mean2pq) b = array(0.0,nmarkers+1) meanb = b meanVe = 0 b[1] = mean(y) var = array(0.0,nmarkers) ppa = array(0.0,nmarkers) ycorr = y - x%*%b # adjust y mcmc.b=NULL mcmc.p=NULL mcmc.v=NULL if(recording){ mcmc.b=matrix(NA,numiter,nmarkers)#storage mcmc.v=matrix(NA,numiter,nmarkers)#storage mcmc.p=matrix(NA,numiter,4)#storage colnames(mcmc.p)=c("Mean","pi","Va","Ve") } # input data scaleb = 0.5*vara/(nmarkers*(1-pi)*mean2pq) consta = 2.0*log(scaleb*2.0) scalec = varEffects*(nua-2)/nua ## only used in Bayesian Cpi # inital values piMean=0.0; meanVar=0.0; probDelta1=0 cat("Gibbs sampling (MCMC)...\n") for (iter in 1:numiter){ if(iter %% 10 == 0) cat ("iteration ",iter," number of loci in model = ", nLoci,"\n") # sample vare vare = ( t(ycorr)%*%ycorr )/rchisq(1,nrecords + 3) #sampling residual variance if(iter>burn.in) meanVe=meanVe+vare # sample intercept ycorr = ycorr + x[,1]*b[1] rhs = sum(ycorr)/vare invLhs = 1.0/(nrecords/vare) mean = rhs*invLhs b[1] = rnorm(1,mean,sqrt(invLhs)) #sampling intercept ycorr = ycorr - x[,1]*b[1] if(iter>burn.in) meanb[1] = meanb[1] + b[1] # sample delta and effect for each locus nLoci = 0 for (locus in 1:nmarkers){ ycorr = ycorr + x[,1+locus]*b[1+locus] rhs = t(x[,1+locus])%*%ycorr xpx = t(x[,1+locus])%*%x[,1+locus] v0 = xpx*vare v1 = (xpx^2*varEffects + xpx*vare) v2 = xpx*vare if(model=="Cpi"){ logDelta0 = -0.5*(log(v0) + rhs^2/v0) + logPi logDelta1 = -0.5*(log(v1) + rhs^2/v1) + logPiComp probDelta1 = 1.0/(1.0 + exp(logDelta0-logDelta1)) var[locus] = varEffects #Handler of invariant markers if(is.nan(probDelta1))probDelta1=0 u = runif(1) } #Bayes AB #============================================================================================= if(model=="AB"){ logDataNullModel = -0.5*(log(v2) + rhs^2/v2) if (var[locus] > 0.0){ logDataOld = -0.5*(log(v1) + rhs^2/v1) sv = scaleb logPriorOld = consta -3.0*log(var[locus]) - sv*4/(2.0*var[locus]) + logPiComp } else { logDataOld = logDataNullModel logPriorOld = logPi } logPosteriorOld = logDataOld + logPriorOld for (mhiter in 1:numMHIter){ u = runif(1) varCandidate = 0 if (u > 0.5){ if (var[locus] > 0){ varCandidate = var[locus]*2 /rchisq(1,4) #Sampling genetic variance } else { varCandidate = scaleb*4/rchisq(1,4) #Sampling genetic variance } } if (varCandidate > 0.0){ v1 = (xpx^2*varCandidate + xpx*vare) logDataNew = -0.5*(log(v1) + rhs^2/v1) sv = scaleb logPriorNew = consta -3.0*log(varCandidate) - sv*4/(2.0*varCandidate) + logPiComp logPosteriorNew = logDataNew + logPriorNew if(var[locus]>0){ sv = varCandidate*0.5 logProposalOld = 2.0*log(sv*2.0) -3.0*log(var[locus]) - sv*4/(2.0*var[locus]) sv = var[locus]*0.5 logProposalNew = 2.0*log(sv*2.0) -3.0*log(varCandidate) - sv*4/(2.0*varCandidate) }else { logProposalOld = 0.0 sv = scaleb logProposalNew = consta -3.0*log(varCandidate) - sv*4/(2.0*varCandidate) } }else { logDataNew = logDataNullModel logPriorNew = logPi logPosteriorNew = logDataNew + logPriorNew if (var[locus]>0){ sv = scaleb logProposalOld = consta -3.0*log(var[locus]) - sv*4/(2.0*var[locus]) logProposalNew = 0.0 }else { logProposalOld = 0.0 logProposalNew = 0.0 } } acceptProb = (exp(logPosteriorNew+logProposalOld-logPosteriorOld-logProposalNew))[1,1] if(is.nan(acceptProb))acceptProb=0 u = runif(1) if( u < acceptProb) { var[locus] = varCandidate logPosteriorOld = logPosteriorNew } }#End of mhiter loop }#end of model=="AB" #============================================================================================= if((model=="Cpi"&u < probDelta1) |(model=="AB"&var[locus]>0)) { nLoci = nLoci + 1 lhs = xpx/vare + 1.0/var[locus] invLhs = 1.0/lhs mean = invLhs*rhs/vare b[1+locus]= rnorm(1,mean,sqrt(invLhs)) #Sampling gi ycorr = ycorr - x[,1+locus]*b[1+locus] if(iter>burn.in) meanb[1+locus] = meanb[1+locus] + b[1+locus] ppa[locus] = ppa[locus] + 1 } else { b[1+locus] = 0.0;var[locus] = 0.0 } }#end of loop on markers if (model=="Cpi"){ # sample common variance countLoci = 0 sum = 0.0 for (locus in 1:nmarkers){ if(var[locus]>0.0){ countLoci = countLoci + 1 sum = sum + b[1+locus]^2 } } varEffects = (scalec*nua + sum)/rchisq(1,nua+countLoci) #sampling common variance of gi if(iter>burn.in) meanVar = meanVar + varEffects #sample Pi aa = nmarkers-countLoci + 1 bb = countLoci + 1 pi = rbeta(1, aa, bb) #sampling Pi if(iter>burn.in) piMean = piMean + pi scalec = (nua-2)/nua*vara/((1-pi)*nmarkers*mean2pq) logPi = log(pi) logPiComp = log(1-pi) }# End of CPi varEffects.out=varEffects if(model=="AB"){ varEffects.out=0 piMean=pi } if(recording){ mcmc.b[iter,]=b[-1] mcmc.v[iter,]=var mcmc.p[iter,]=c(b[1],pi,varEffects.out,vare) } }# end of iteration on iter piMean = piMean/numiter meanVar = meanVar/numiter meanVe = meanVe/numiter meanb = meanb/numiter ppa = ppa/numiter effect=meanb[-1] return(list(effect=effect,var=var,mean=meanb[1],pi=piMean,Va=meanVar,Ve=as.numeric(meanVe),mcmc.p=mcmc.p,mcmc.b=mcmc.b,mcmc.v=mcmc.v)) }