##############################################################################################
GAPIT <- function(Y=NULL,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,emmaXp3d=TRUE,
                groupFrom=1 ,groupTo=1000000,groupBy=10,CA="average", KT='Mean',kinMethod=NULL,
                model="Add",numPCs=0, Ratio = 1, seed = 123, BINS = 20,GPSonly=FALSE,DPP=50000,                
                numFiles=0, num.read = 1024,dataPath=NULL, 
                GFile=NULL, GFileExt=NULL,
                GDFile=NULL, GMFile=NULL, GDFileExt=NULL,GMFileExt=NULL, 
                mafRate=0,FDR.Rate = 1, FDR.Filter.Rate=1,
                ngrid = 100, llim = -10, ulim = 10, esp = 1e-10){
GAPIT.Version="1.31 (fragmentation withing files)"
#Object: To perform GWAS and GPS (Genomic Prediction/Selection)
#Output: See GAPIT.Main
#Authors: Zhiwu Zhang
#Last update: August 10, 2011 
##############################################################################################
#Input defination
#Y 		#This is phenotype data

#Genotype data can appear as either hapmap or numerical, but not both
#G		#This is genotype data in format of hapmap
#GD		#They  pair of genotype (data and map)with genotype data in numerical format
#GM	

#Genotype can be loaded as above genotype data. The other option is by file names	
#numFiles	#Number of Genotype files
#dataPath	#The location of genotype files
#GFile	#Common file name (before the numerical part) for hapmap format
#GFileExt	#Common file extention (after the numerical part)

#GDFile	#Common file name (before the numerical part) for genotype data
#GMFile=	#Common file name (before the numerical part) for genotype map
#GDFileExt	#Common file extention (after the numerical part) for genotype data
#GMFileExt	#Common file extention (after the numerical part)for genotype map		

#KI		#This is kinship data, set it to NULL in case that geneotype files are used for estimation
#CV		#This is the covariate variables of fixed effects, such as population structure
#Z		#This is the customized Z matrix

#groupFrom	#Lower bound for number of group
#groupTo	#Upper bound for number of group
#groupBy	#rang between 1 and number of individuals, smaller the finner optimization 
#CA		#clustering method: "average", "complete", "ward", "single", "mcquitty", "median","centroid". Example: CA=c("complete","average")
#KT		#Group kinship tppe:  "Mean", "Max", "Min", "Median". Example: KT=c("Mean","Max")
#emmaXp3d	#This is the option to use P3D (TRUE) or not (FALSE)

#model	#Genetic model for SNP effect: "Add" or "Dom"
#mafRate	#The SNP below this rate will be removed from reports
#FDR.Rate 	#Alex will add explainatino
#kinMethod	#The method to estimate kinship from genotype files. Options are "EMMA", "Loiselle"
#numPCs=	#Numer of PCS derived from genotype as population structure
#DPP=10000 #Dots per plot (DPP) for Manhattan and QQ plots 

#--------------------------------------------------------------------------------------------------------------------<

if(!is.null(Y)){
if (ncol(Y)<2)  stop ("Phenotype should have taxa name and one trait at least. Please correct phenotype file!")
for (trait in 2: ncol(Y))  {

gapitMain <- GAPIT.Main(Y=Y[,c(1,trait)],G=G,GD=GD,GM=GM,KI=KI,Z=Z,CV=CV,emmaXp3d=emmaXp3d,kinMethod=kinMethod,
				              groupFrom=groupFrom,groupTo=groupTo,groupBy=groupBy,CA=CA,KT=KT,name.of.trait = colnames(Y)[trait],
                        dataPath=dataPath,numFiles=numFiles, num.read = num.read, GFile=GFile,GFileExt=GFileExt,GDFile=GDFile, GMFile=GMFile, GDFileExt=GDFileExt,GMFileExt=GMFileExt, 
                        mafRate = mafRate,FDR.Rate = FDR.Rate,FDR.Filter.Rate=FDR.Filter.Rate,model=model,numPCs=numPCs,GAPIT.Version=GAPIT.Version,
                        GT=NULL, Ratio = Ratio, seed = seed, BINS = BINS,GPSonly=GPSonly,DPP=DPP)  
                 
#return (list(KI=gapitMain$KI,PC=gapitMain$PC))

}# end of loop on trait
}# end ofdetecting null Y

#Calculating kinship or PC in case that phenotype is not provided
if(is.null(Y)){
Timmer=GAPIT.Timmer(Infor="Kinsip and PC only")
Memory=GAPIT.Memory(Infor="Kinsip and PC only")

myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,kinMethod=kinMethod,numPCs=numPCs,
                dataPath=dataPath,numFiles=numFiles, num.read = num.read, GFile=GFile, GFileExt=GFileExt,GDFile=GDFile, GMFile=GMFile, GDFileExt=GDFileExt,GMFileExt=GMFileExt,
                mafRate=mafRate,FDR.Rate = FDR.Rate,FDR.Filter.Rate=FDR.Filter.Rate,model="Add", seed = seed, Ratio = Ratio)

Timmer=myGenotype$Timmer
Memory=myGenotype$Memory
          
return (list(KI=myGenotype$KI,PC=myGenotype$PC))
}
print("GAPIT accomplished successfully!")
return(gapitMain)
}  #end of GAPIT function


##############################################################################################
GAPIT.Main <- function(Y,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,emmaXp3d=TRUE,
                groupFrom=1000000 ,groupTo=1,groupBy=10,CA="average", KT='Mean',kinMethod=NULL,DPP=50000,
               	ngrid = 100, llin = -10, ulim = 10, esp = 1e-10,
                dataPath=NULL,numFiles=NULL, num.read = 1024, GFile=NULL, GFileExt=NULL,GDFile=NULL, GMFile=NULL, GDFileExt=NULL,GMFileExt=NULL,
                mafRate=0,FDR.Rate=1,FDR.Filter.Rate=1,model="Add",numPCs=0,  GAPIT.Version=GAPIT.Version,
                name.of.trait, GT = NULL, Ratio = 1, seed = 123, BINS = 20,GPSonly=FALSE){
#Object: To perform GWAS and GPS (Genomic Prediction/Selection)
#Output: GWAS table (text file), QQ plot (PDF), Manhattan plot (PDF), genomic prediction (text file), and
#        genetic and residual variance components
#Authors: Zhiwu Zhang
# Last update: may 12, 2011
##############################################################################################

Timmer=GAPIT.Timmer(Infor="GAPIT (GWAS and GS in R)")
Memory=GAPIT.Memory(Infor="GAPIT (GWAS and GS in R)")

#Genotype processing and calculation Kin and PC
print("Procesing genotype...")
myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,kinMethod=kinMethod,numPCs=numPCs,Ratio=Ratio,GPSonly=GPSonly,
                dataPath=dataPath,numFiles=numFiles, num.read = num.read, GFile=GFile, GFileExt=GFileExt,GDFile=GDFile, GMFile=GMFile, GDFileExt=GDFileExt,GMFileExt=GMFileExt,
                mafRate=mafRate,FDR.Rate = FDR.Rate,FDR.Filter.Rate=FDR.Filter.Rate,model="Add")

print("Genotype called from main function")                
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Kin and PCA")
Memory=GAPIT.Memory(Memory=Memory,Infor="Kin and PCA")

genoFormat=myGenotype$genoFormat
KI=myGenotype$KI
PC=myGenotype$PC
hasGenotype=myGenotype$hasGenotype
byFile=myGenotype$byFile
fullGD=myGenotype$fullGD
GD=myGenotype$GD
GI=myGenotype$GI
GT=myGenotype$GT

#Examine status of datasets
print("Examing data...")
if(is.null(Y)) stop ("GAPIT says: Phenotype must exist.")
if(is.null(KI)&missing(GD)) stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.")

#When GT and GD are missing, force to have fake ones (creating them from Y),GI is not required in this case
if(is.null(GD) & is.null(GT)) {
	GT=as.matrix(Y[,1])
	GD=matrix(1,nrow(Y),1)	
  GI=as.data.frame(matrix(0,1,3) )
  colnames(GI)=c("SNP","Chromosome","Position")
}


#merge CV with PC
if(numPCs>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC)
if(numPCs>0&is.null(CV))CV=PC

#Create Z as identity matrix from Y if it is not provided
if(is.null(Z)){
taxa=as.character(Y[,1])
Z=as.data.frame(diag(1,nrow(Y)))
Z=rbind(taxa,Z)
taxa=c('Taxa',as.character(taxa))
Z=cbind(taxa,Z)
}

#Add the part of non proportion in Z matrix
if(!is.null(Z)&nrow(Z)-1<nrow(Y)){
Z=GAPIT.ZmatrixFormation(Z=Z,Y=Y)
}

#Create CV with all 1's if it is not provided
if(is.null(CV)){
CV=Y[,1:2]
CV[,2]=1
colnames(CV)=c("taxa","overall")
}

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="ZMatrix")
#Memory=GAPIT.Memory(Memory=Memory,Infor="ZMatrix")

#Data quality control
print("Quality control...")

qc <- GAPIT.QC(Y=Y,KI=KI, GT=GT,CV=CV,Z=Z)

print("Preparing optimization...")
GTindex=qc$GTindex
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QC")
Memory=GAPIT.Memory(Memory=Memory,Infor="QC")

#Hanler of group boundry
if(groupFrom>groupTo) stop("GAPIT says: groupTo should  be larger than groupFrom. Please correct them!")

if(is.null(CV) | (!is.null(CV)& groupTo<ncol(CV))) {
#The minimum of group is number of columns in CV
  groupFrom=1
  groupTo=1
  warning("The upper bound of groups (groupTo) is not sufficient. both boundries were set to a and GLM is performed!")
}

if(!is.null(CV)& groupFrom<1) {
  groupFrom=1 #minimum of group is number of columns in CV
  warning("The lower bound of groups should be 1 at least. It was set to 1!")
}

if(groupTo>nrow(qc$KI)) {
  groupTo=nrow(qc$KI) #maximum of group is number of rows in KI
	if(groupFrom>nrow(qc$KI)) groupFrom=nrow(qc$KI)
  warning("The upper bound of groups is too high. It was set to the size of kinship!")
}

#Optimization for group number, cluster algorithm and kinship type
GROUP=seq(groupTo,groupFrom,by=-groupBy)#The reverse order is to make sure to include full model
if(missing("CA")) CA=c("ward", "single", "complete", "average", "mcquitty", "median", "centroid")
if(missing("KT")) KT=c("Mean", "Max", "Min", "Median")
numSetting=length(GROUP)*length(CA)*length(KT)

#Reform Y, GD and CV into EMMA format
ys=as.matrix(qc$Y[2])
X0=as.matrix(qc$CV[,-1])

#Initial
REMLs.best=10E20
ca.best=CA[1]
group.best=GROUP[1]
kt.best=KT[1]
count=0
compresion.KT<- list(NULL)
compresion.CA<- list(NULL)
compresion.GROUP<- list(NULL)
compresion.REML<- list(NULL)
compresion.VG<- list(NULL)
compresion.VE<- list(NULL)
comp=matrix(,numSetting,6)

#add indicator of overall mean
if(min(X0[,1])!=max(X0[,1])) X0 <- cbind(1, X0) #do not add overall mean if X0 has it already at first column

#--------------------------------------------------------------------------------------------------------------------#
print("Compressing..." )
print(paste("The total combinations: ", numSetting,sep=""))
#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="DataProcessing")
#Memory=GAPIT.Memory(Memory=Memory,Infor="DataProcessing")

#Loop to optimize cluster algorithm, group number and kinship type
for (ca in CA){
for (group in GROUP){
for (kt in KT){

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 1")
#Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 1")
count=count+1

if(group<ncol(X0)+1) group=1 # the emma function (emma.delta.REML.dLL.w.Z) does not allow K has dim less then CV. turn to GLM (group=1)

cp <- GAPIT.Compress(KI=qc$KI,CA=ca,KT=kt,GN=group,Timmer=Timmer,Memory=Memory)
Timmer=cp$Timmer
Memory=cp$Memory

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")

bk <- GAPIT.Block(Z=qc$Z,GA=cp$GA,KG=cp$KG)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")

zc <- GAPIT.ZmatrixCompress(Z=qc$Z,GAU =bk$GA)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")

#Reform KW and Z into EMMA format

z0=as.matrix(zc$Z[,-1])
Z=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Prio PreP3D")
Memory=GAPIT.Memory(Memory=Memory,Infor="Prio PreP3D")

#Do not screen SNP unless existing genotype and one combination
if(numSetting==1 & hasGenotype){
 optOnly=FALSE
}else{
optOnly=TRUE
}
if(GPSonly) optOnly=TRUE

if(optOnly){
 colInclude=1
}else{
 colInclude=c(1:ncol(GD))
}

if(!optOnly) print("Compressing and Genome screening..." )

#Evaluating maximum likelohood
p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,colInclude])),K = as.matrix(bk$KW) ,Z=Z,X0=X0,GI=GI,emmaXp3d=emmaXp3d,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
			 dataPath=dataPath,numFiles=numFiles, num.read = num.read, byFile=byFile, GFile=GFile,GFileExt=GFileExt,GDFile=GDFile, GMFile=GMFile, GDFileExt=GDFileExt,GMFileExt=GMFileExt,
       GTindex=GTindex,genoFormat=genoFormat,optOnly=optOnly,model=model)

Timmer=p3d$Timmer
Memory=p3d$Memory

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Post PreP3D")
Memory=GAPIT.Memory(Memory=Memory,Infor="Post PreP3D")

#print("Cluster algorithm, kinship type, groups, VG, Ve and REML:")
print(paste(count, "of",numSetting,"--","Vg=",round(p3d$vgs,4), "VE=",round(p3d$ves,4),"-2LL=",round(p3d$REMLs,2), "  Clustering=",ca,"  Group number=", group ,"  Group kinship=",kt,sep = " "))

compresion.KT[[count]]=kt
compresion.CA[[count]]=ca
compresion.GROUP[[count]]=group
compresion.REML[[count]]=p3d$REMLs
compresion.VG[[count]]=p3d$vgs
compresion.VE[[count]]=p3d$ves

#recording compresion profile on array
comp[numSetting,1]=kt
comp[numSetting,2]=ca
comp[numSetting,3]=group
comp[numSetting,4]=p3d$REMLs
comp[numSetting,5]=p3d$vgs
comp[numSetting,6]=p3d$ves

#Recording the best combination
if(p3d$REMLs!="NaN" || numSetting==1)
{
  if(p3d$REMLs<REMLs.best || numSetting==1)
  {
    REMLs.best=p3d$REMLs
    ca.best=ca
    group.best=group
    kt.best=kt
    cp.best=cp
    bk.best=bk
    zc.best=zc
    p3d.best=p3d

  }
}


}#end of for (ca in CA)

#Skip the rest group in case group 1 is finished
if(group==1) break #To skip the rest group iteRations

}#end of for (group in GROUP)
}#end of for (kt in KT)
print(paste("Optimum: ",ca.best,kt.best,group.best,p3d.best$vgs, p3d.best$ves,p3d.best$REMLs ,sep = " "))
#--------------------------------------------------------------------------------------------------------------------#
#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression")
#Memory=GAPIT.Memory(Memory=Memory,Infor="Copmression")

if(numSetting==1)
{
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")
}


#Collecting compression information
Compression= cbind(compresion.KT,compresion.CA,compresion.GROUP,compresion.REML,compresion.VG,compresion.VE)

#Perform GWAS with the optimum setting
#This section is omited if there is only one setting
if (numSetting>1 & hasGenotype & !GPSonly) {
  print("Genomic screening..." )
  #Reform KW and Z into EMMA format
  optOnly=FALSE
  z0=as.matrix(zc.best$Z[,-1])
  Z=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))
  
   
  p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,]))   ,K = as.matrix(bk.best$KW) ,Z=Z,X0=X0,GI=GI,emmaXp3d=emmaXp3d,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
    			 dataPath=dataPath,numFiles=numFiles, num.read = num.read, byFile=byFile, GFile=GFile,GFileExt=GFileExt,GDFile=GDFile, GMFile=GMFile, GDFileExt=GDFileExt,GMFileExt=GMFileExt,
           GTindex=GTindex,genoFormat=genoFormat,optOnly=FALSE,model=model)  
    
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")

}#end of if (numSetting>1 & hasGenotype & !GPSonly)

#Plotting optimum group kinship
if(nrow(bk.best$KW)<nrow(KI)){
pdf(paste("GAPIT.Kin.Optimum.pdf",sep=""), width = 12, height = 12)
par(mar = c(25,25,25,25))
heatmap.2(as.matrix(bk.best$KW),  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
dev.off()
}



#Merge GWAS resultss from files to update ps,maf and nobs in p3d
if(byFile&!fullGD)
{
print("Loading GWAS results from file...")
for (file in 1:numFiles)
{

#Initicalization
frag=1
numSNP=num.read

while(numSNP==num.read) {     #this is problematic if the read end at the last line  

#Initicalization GI to detect reading empty line
#theGI=NULL
#theP=NULL
#theMAF=NULL
#thenobs=NULL

#reload results from files
print(paste("Current file ",file,"Fragment: ",frag))

theGI <- try(read.table(paste("GAPIT.TMP.GI.",file,".",frag,".txt",sep=""), head = TRUE)   ,silent=TRUE)
theP <- try(read.table(paste("GAPIT.TMP.ps.",file,".",frag,".txt",sep=""), head = FALSE)   ,silent=TRUE)
theMAF <- try(read.table(paste("GAPIT.TMP.maf.",file,".",frag,".txt",sep=""), head = FALSE),silent=TRUE)
thenobs <- try(read.table(paste("GAPIT.TMP.nobs.",file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)


#if(inherits(theGI, "try-error"))  {
if(nrow(theGI)<1){
  numSNP=0
  #print("This fragment is empty.")
}else{

#print("Records loaded for this fragment.")
  numSNP=nrow(theGI)  
  colnames(theP)="P"
  colnames(theMAF )="MAF"
  colnames(thenobs )="nobs"
  colnames(theGI) = colnames(GI)
 
#Merge results  
  if(file==1 & frag==1){
    GI=theGI  
    allP=theP
    allMAF=theMAF
    allnobs=thenobs
  }else{
    allP=as.data.frame(rbind(as.matrix(allP),as.matrix(theP))  )
    allMAF=as.data.frame(rbind(as.matrix(allMAF),as.matrix(theMAF)) )
    allnobs=as.data.frame(rbind(as.matrix(allnobs),as.matrix(thenobs)))
    GI=as.data.frame(rbind(as.matrix(GI),as.matrix(theGI)))
  }

}#end of  if(inherits(theGI, "try-error")) (else section)

#setup for next fragment
frag=frag+1   #Progress to next fragment 

}#end of loop on fragment: while(numSNP==num.read)
}#end of loop on file

#update p3d with components from files
  p3d$ps=allP
  p3d$maf=allMAF
  p3d$nobs=allnobs
  print("GWAS results loaded from all files succesfully!")
} #end of if(byFile)

#--------------------------------------------------------------------------------------------------------------------#
#Final report
print("Generating summary" )

#genomic prediction
print("Genomic Breeding Values (GBV) ..." )
if(length(bk.best$KW)>ncol(X0)) {
  gs <- GAPIT.GS(KW=bk.best$KW,KO=bk.best$KO,KWO=bk.best$KWO,GAU=bk.best$GAU,UW=cbind(p3d.best$BLUP,p3d.best$PEV))
}
if(length(bk.best$KW)>ncol(X0)) {
  write.table(gs$BLUP, paste("GAPIT.", name.of.trait,".BLUP.csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
}
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GPS")
Memory=GAPIT.Memory(Memory=Memory,Infor="GPS")

#Make heatmap for distribution of BLUP and PEV
print("GBV and accuracy distribution..." )
if(length(bk.best$KW)>ncol(X0)) {
  GAPIT.GS.Visualization(gsBLUP = gs$BLUP, BINS=BINS,name.of.trait = name.of.trait)
}

#Make a plot Summarzing the Compression Results, if more than one "compression level" has been assessed
print("Compression portfolios..." )
GAPIT.Compression.Visualization(Compression = Compression, name.of.trait = name.of.trait)

#Export GWAS results
if(hasGenotype )
{

  print("Filtering SNPs with MAF..." )
	index=p3d$maf>=mafRate	     
	PWI.Filtered=cbind(GI,p3d$ps,p3d$maf,p3d$nobs)[index,]
	colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs")
  	     
		     
  print("SNPs filtered with MAF")
  
  if(!is.null(PWI.Filtered))
  {

  #Run the BH multiple correction procedure of the results
  #Create PWIP, which is a table of SNP Names, Chromosome, bp Position, Raw P-values, FDR Adjusted P-values
  print("Calculating FDR..." )
  PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH")

  #QQ plots
  print("QQ plot..." )
  GAPIT.QQ(P.values = PWIP$PWIP[,4], name.of.trait = name.of.trait,DPP=DPP)

  #Manhattan Plots
   print("Manhattan plot (Genomewise)..." )
  GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise")
 print("Manhattan plot (Chromosomewise)..." )
  GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise")

  #Association Table
  print("Association table..." )
  #GAPIT.Table(final.table = PWIP$PWIP, name.of.trait = name.of.trait,FDR.Filter.Rate=FDR.Filter.Rate)
  write.table(PWIP$PWIP[PWIP$PWIP[,7]<=FDR.Filter.Rate,], paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)



  
  } #end of if(!is.null(PWI.Filtered))
} #end of if(hasGenotype )

#Log
log=GAPIT.Log(Y=Y,KI=KI,Z=Z,CV=CV,emmaXp3d=emmaXp3d,
				groupFrom = groupFrom ,groupTo =groupTo ,groupBy = groupBy ,CA = CA, KT= KT,
                      	ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait)
#Memory usage
#GAPIT.Memory.Object(name.of.trait=name.of.trait)

#Timming
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Report")
Memory=GAPIT.Memory(Memory=Memory,Infor="Report")

file=paste("GAPIT.", name.of.trait,".Timming.csv" ,sep = "")
write.table(Timmer, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

file=paste("GAPIT.", name.of.trait,".Memory.Stage.csv" ,sep = "")
write.table(Memory, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

print(paste(name.of.trait, "has been analyzed successfully!") )
print(paste("The results are saved in the directory of ", getwd()) )
print("---------------------------------------------------------------------------------------")

return (list(Timmer=Timmer,Compression=Compression,KI=KI,PC=PC))

}#The function GAPIT.Main ends here

##############################################################################################
GAPIT.EMMAxP3D <- function(ys,xs,K=NULL,Z=NULL,X0=NULL,GI=NULL,
		dataPath=NULL,numFiles=1, genoFormat="Hapmap", num.read=NULL,byFile=FALSE,fullGD=TRUE,Ratio=1,
    GFile=NULL,GFileExt=NULL,GTindex=NULL,GDFile=NULL, GMFile=NULL, GDFileExt=NULL,GMFileExt=NULL,
    emmaXp3d=TRUE,Timmer,Memory,optOnly=TRUE,model="Add",
    ngrids=100,llim=-10,ulim=10,esp=1e-10   ){
#Object: To esimate variance component by using EMMA algorithm and perform GWAS with P3D/EMMAx
#Output: ps, REMLs, stats, dfs, vgs, ves, BLUP,  BLUP_Plus_Mean, PEV
#Authors: Feng Tian, Alex Lipka and Zhiwu Zhang
# Last update: April 26, 2011
# Library used: EMMA (Kang et al, Genetics, Vol. 178, 1709-1723, March 2008)
# Note: This function was modified from the function of emma.REML.t from the library
##############################################################################################

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="P3D Start")
Memory=GAPIT.Memory(Memory=Memory,Infor="P3D Start")

#--------------------------------------------------------------------------------------------------------------------<
#Change data to matrix format if they are not
if (is.null(dim(ys)) || ncol(ys) == 1)  ys <- matrix(ys, 1, length(ys))
if (is.null(X0)) X0 <- matrix(1, ncol(ys), 1)

#handler of special Z and K
if(!is.null(Z)){ if(ncol(Z) == nrow(Z)) Z = NULL }
if(!is.null(K)) {if(length(K)<2) K = NULL}

#Extract dimension information
g <- nrow(ys) #number of traits
n <- ncol(ys) #number of observation

q0 <- ncol(X0)#number of fixed effects
q1 <- q0 + 1  #Nuber of fixed effect including SNP

nr=n
if(!is.null(K)) tv=ncol(K)

#decomposation without fixed effect
if(!is.null(K)) eig.L <- emma.eigen.L(Z, K) #this function handle both NULL Z and non-NULL Z matrix
#if(!is.null(K)) eig.L$values[which(eig.L$values<0)]=0  #Negative eigen values
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.L")
Memory=GAPIT.Memory(Memory=Memory,Infor="eig.L")



#decomposation with fixed effect (SNP not included)
X <-  X0 #covariate variables such as population structure
if (!is.null(Z) & !is.null(K)) eig.R <- emma.eigen.R.w.Z(Z, K, X) #This will be used to get REstricted ML (REML)
if (is.null(Z)  & !is.null(K)) eig.R <- emma.eigen.R.wo.Z(   K, X) #This will be used to get REstricted ML (REML)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.R")
Memory=GAPIT.Memory(Memory=Memory,Infor="eig.R")





#-------------------------------------------------------------------------------------------------------------------->

#Loop on Traits
for (j in 1:g)
{

#--------------------------------------------------------------------------------------------------------------------<
if(!is.null(K)) REMLE <- emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R)
if (!is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE$delta)),rep(sqrt(1/REMLE$delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE)
if ( is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(  sqrt(1/(eig.L$values + REMLE$delta)),nr,length(eig.L$values),byrow=TRUE)
x.prev <- vector(length = 0)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="REML")
Memory=GAPIT.Memory(Memory=Memory,Infor="REML")
#-------------------------------------------------------------------------------------------------------------------->

#The cases that go though multiple file once
if(optOnly) numFiles=1 
if(fullGD)  numFiles=1 
if(!fullGD & !optOnly) print("Screening SNPs from file...")

#Add loop for genotype data files
for (file in 1:numFiles)
{
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="New Genotype file")
Memory=GAPIT.Memory(Memory=Memory,Infor="New Genotype file")


  
  frag=1
  numSNP=num.read
  myFRG=NULL
while(numSNP==num.read) {     #this is problematic if the read end at the last line  
  
  #force to skip the while loop if optOnly
  if(optOnly) numSNP=0
  
  #Determine the case of first file and first fragment: skip read file
  if(file==1 & frag==1){
    firstFileFirstFrag=TRUE
  }else{
    firstFileFirstFrag=FALSE  
  }
  
#In case of xs is not full GD, replace xs from file
  if(!fullGD & !optOnly & !firstFileFirstFrag )
  { 
#update xs for each file
    rm(xs)
    gc()
    print(paste("Current file: ",file," , Fragment: ",frag,sep=""))
    myFRG=GAPIT.Fragment( dataPath=dataPath,  numFiles=numFiles,GFile=GFile,GFileExt=GFileExt,
                          seed=seed,Ratio=Ratio,model=model,genoFormat=genoFormat, 
                          GDFile=GDFile,GDFileExt=GDFileExt,GMFile=GMFile,GMFileExt=GMFileExt,num.read=num.read,file=file,frag=frag) 

      if(is.null(myFRG$GD)){
        xs=NULL
      }else{
        xs=myFRG$GD[GTindex,] 
      }


        if(!is.null(myFRG$GI))    {
          colnames(myFRG$GI)=c("SNP","Chromosome","Position")
          GI=as.matrix(myFRG$GI)
        }


      if(!is.null(myFRG$GI))    {
        numSNP=ncol(myFRG$GD)
      }  else{
       numSNP=0
      }
      if(is.null(myFRG))numSNP=0  #force to end the while loop
  } # end of if(!fullGD)   

  if(fullGD)numSNP=0  #force to end the while loop  

#Skip REML if xs is from a empty fragment file    
if(!is.null(xs))  {
  
  if (is.null(dim(xs)) || nrow(xs) == 1)  xs <- matrix(xs, length(xs),1)
  m <- ncol(xs) #number of SNPs
  t <- nrow(xs) #number of individuals
  
  #allocate spaces for SNPs
  dfs <- matrix(nrow = m, ncol = g)
  stats <- matrix(nrow = m, ncol = g)
  ps <- matrix(nrow = m, ncol = g)
  nobs <- matrix(nrow = m, ncol = g)
  maf <- matrix(nrow = m, ncol = g)
  #print(paste("Memory allocated.",sep=""))
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Memory allocation")
  Memory=GAPIT.Memory(Memory=Memory,Infor="Memory allocation")
  
  if(optOnly)mloop=0
  if(!optOnly)mloop=m
   
  #Loop on SNPs
  #print(paste("Number of SNPs is ",mloop," in genotype file ",file, sep=""))
for (i in 0:mloop){
#print(i)
#--------------------------------------------------------------------------------------------------------------------<
    normalCase=TRUE
    if ((i>0)&(floor(i/1000)==i/1000))  print(paste("Genotype file: ", file,", SNP: ",i," ",sep=""))

    # To extract current snp. It save computation for next one in case they are identical
    if(i==0&file==1){
      #For the model without fitting SNP
      vids <- !is.na(ys[j,]) #### Feng changed
      xv <- ys[j, vids]*0+1      #### Feng changed
    }

    if(i>0){
      vids <- !is.na(xs[,i]) #### Feng changed
      xv <- xs[vids,i]      #### Feng changed
      vids.TRUE=which(vids==TRUE)
      vids.FALSE=which(vids==FALSE)
      ns=length(xv)
      ss=sum(xv)
      maf[i]=min(.5*ss/ns,1-.5*ss/ns)
	nobs[i]=ns
    }

    #Situation of no variation for SNP except the fisrt one(synthetic for EMMAx/P3D)
    if ((min(xv) ==max(xv) )&i>0)
    {
      dfs[i, ] <- rep(NA, g)
      stats[i, ] <- rep(NA, g)
      ps[i, ] = rep(1, g)
      normalCase=FALSE
    }else if(identical(x.prev, xv))     #Situation of the SNP is identical to previous
    {
    print("how does it get here????")
    print(length(i))
    print(i)
    print(dim(i))
    print(dim(x.prev))
    print(dim(xv))
    
      dfs[i, ] <- dfs[i - 1, ]
      stats[i, ] <- stats[i - 1, ]
      ps[i, ] <- ps[i - 1, ]
      normalCase=FALSE
    }
#-------------------------------------------------------------------------------------------------------------------->


    #Normal case
    if(normalCase)
    {

#--------------------------------------------------------------------------------------------------------------------<
      #nv <- sum(vids)
      yv <- ys[j, vids] #### Feng changed
      nr <- sum(vids) #### Feng changed
      if (!is.null(Z) & !is.null(K))
      {
        r<- ncol(Z) ####Feng, add a variable to indicate the number of random effect
        vran <- vids[1:r] ###Feng, add a variable to indicate random effects with nonmissing genotype
        tv <- sum(vran)  #### Feng changed
      }






#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<



      if(i>0) dfs[i, j] <- nr - q1
    	if(i>0) X <- cbind(X0[vids, , drop = FALSE], xs[vids,i])

       #Recalculate eig and REML if not using P3D  NOTE THIS USED TO BE BEFORE the two solid lines
      if(emmaXp3d==FALSE & !is.null(K))
      {
        if (!is.null(Z)) eig.R <- emma.eigen.R.w.Z(Z, K, X) #This will be used to get REstricted ML (REML)
        if (is.null(Z)) eig.R <- emma.eigen.R.wo.Z(   K, X) #This will be used to get REstricted ML (REML)
        if (!is.null(Z)) REMLE <- emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R)
        if ( is.null(Z)) REMLE <- emma.REMLE(ys[j,], X, K, Z = NULL, ngrids, llim, ulim, esp, eig.R)
        if (!is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE$delta)),rep(sqrt(1/REMLE$delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE)
        if ( is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(  sqrt(1/(eig.L$values + REMLE$delta)),nr,length(eig.L$values),byrow=TRUE)
      }

      if(n==nr)
      {
        if(!is.null(K))
        {
            yt <- crossprod(U, yv)
            Xt <- crossprod(U, X)
        }else{
        yt=yv
        Xt=X
        }
        XX=crossprod(Xt, Xt)



        if(XX[1,1] == "NaN")
        {
          Xt[which(Xt=="NaN")]=0
          yt[which(yt=="NaN")]=0
          XX=crossprod(Xt, Xt)
        }
        XY=crossprod(Xt, yt)
      }

      #Missing SNP
      if(n>nr)
      {
       UU=crossprod(U,U)
       A11=UU[vids.TRUE,vids.TRUE]
       A12=UU[vids.TRUE,vids.FALSE]
       A21=UU[vids.FALSE,vids.TRUE]
       A22=UU[vids.FALSE,vids.FALSE]
       A22i=try(solve(A22) )
       if(inherits(A22i, "try-error")) A22i <- ginv(A22)

       F11=A11-A12%*%A22i%*%A21
       XX=crossprod(X,F11)%*%X
       XY=crossprod(X,F11)%*%yv
      }
      iXX <- try(solve(XX) )
      if(inherits(iXX, "try-error")) iXX <- ginv(crossprod(Xt, Xt))
      beta <- iXX %*% XY

#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<
      if(i==0 &file==1  & !is.null(K))
      {
        Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="ReducedModel")
	  Memory=GAPIT.Memory(Memory=Memory,Infor="ReducdModel")

        #Calculate the blup gammahat = vgKZprimeVinv*(Y-Xbetahat)
        vgs <- REMLE$vg
        ves <- REMLE$ve
        REMLs <- REMLE$REML

        XtimesBetaHat <- X %*% beta
        YminusXtimesBetaHat <- ys[j,]- XtimesBetaHat
        vgK <- REMLE$vg*K
        Dt <- crossprod(U, YminusXtimesBetaHat)
        if (!is.null(Z))  Zt <- crossprod(U, Z)
        if (is.null(Z)) Zt <- t(U)

        if(XX[1,1] == "NaN")
        {
        Dt[which(Dt=="NaN")]=0
        Zt[which(Zt=="NaN")]=0
        }

        BLUP <- K %*% crossprod(Zt, Dt) #Using K instead of vgK because using H=V/Vg

        grand.mean.vector <- rep(beta[1], length(BLUP))
        BLUP_Plus_Mean <- grand.mean.vector + BLUP
    	  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="BLUP")
        Memory=GAPIT.Memory(Memory=Memory,Infor="BLUP")

        #PEV
        C11=try(vgs*solve(crossprod(Xt,Xt)))
        if(inherits(C11, "try-error")) C11=vgs*ginv(crossprod(Xt,Xt))

        C21=-K%*%crossprod(Zt,Xt)%*%C11
        Kinv=try(solve(K)    )
        if(inherits(Kinv, "try-error")) Kinv=ginv(K)

        if(!is.null(Z)) term.0=crossprod(Z,Z)/ves
        if(is.null(Z)) term.0=diag(1/ves,nrow(K))

        term.1=try(solve(term.0+Kinv/vgs )  )
        if(inherits(term.1, "try-error")) term.1=ginv(term.0+Kinv/vgs )

        term.2=C21%*%crossprod(Xt,Zt)%*%K
        C22=(term.1-term.2 )
        PEV=as.matrix(diag(C22))
        BLUE=X%*%beta

    	  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PEV")
        Memory=GAPIT.Memory(Memory=Memory,Infor="PEV")

      }#end of if(i==0&file==1   & !is.null(K))
#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<
      if(i==0 &file==1  & is.null(K))
      {
        YY=crossprod(yt, yt)
        ves=(YY-crossprod(beta,XY))/(n-q0)
        r=yt-X%*%iXX%*%XY
        REMLs=-.5*(n-q0)*log(det(ves))                  -.5*n                   -.5*(n-q0)*log(2*pi)
#       REMLs=-.5*n*log(det(ves)) -.5*log(det(iXX)/ves) -.5*crossprod(r,r)/ves  -.5*(n-q0)*log(2*pi)
        vgs = 0
        BLUP = 0
        BLUP_Plus_Mean = NaN
        PEV = ves
       	BLUE=X%*%beta
      }

      #calculate t statistics and probabilty
      if(i > 0)
      {
        if(!is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *REMLE$vg)
        if(is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *ves)
        ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ],lower.tail = FALSE)
      }
#-------------------------------------------------------------------------------------------------------------------->

    } # End of if(normalCase)
    x.prev=xv #update SNP

} # End of loop on SNPs

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Screening SNPs")
Memory=GAPIT.Memory(Memory=Memory,Infor="Screening SNPs")

#output p value for the genotype file
if(!fullGD)
{
  write.table(GI, paste("GAPIT.TMP.GI.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = TRUE)
  write.table(ps, paste("GAPIT.TMP.ps.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  write.table(maf, paste("GAPIT.TMP.maf.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  write.table(nobs, paste("GAPIT.TMP.nobs.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  #rm(dfs,stats,ps,nobs,maf,GI)   #This cause problem on return
  #gc()
}


    frag=frag+1   #Progress to next fragment
    
} #end of if(!is.null(X))
    
} #end of repeat on fragment 



} # Ebd of loop on file
} # End of loop on traits

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS done for this Trait")
Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS done for this Trait")


return(list(ps = ps, REMLs = -2*REMLs, stats = stats, dfs = dfs,maf=maf,nobs = nobs,Timmer=Timmer,Memory=Memory,
        vgs = vgs, ves = ves, BLUP = BLUP, BLUP_Plus_Mean = BLUP_Plus_Mean,
        PEV = PEV, BLUE=BLUE))

#print("GAPIT.EMMAxP3D accomplished successfully!")
}#end of GAPIT.EMMAxP3D function

##############################################################################################
GAPIT.ZmatrixFormation <- function(Z,Y){
#Object: To expande the proportion Z to final Z
#Output: Z
#Authors: Zhiwu Zhang 
# Last update: April 22, 2011 
##############################################################################################

#split individuals in Y to the ones that are given Z and the one not
taxa.Z=as.matrix(Z[-1,1])
taxa.Y=as.matrix(Y[,1])
taxa.diff=setdiff(taxa.Y,taxa.Z)
taxa.I=as.matrix(taxa.Y[match(taxa.diff,taxa.Y,nomatch = 0)])
taxa.Z.col=as.matrix(Z[1,-1])

#Create final Z with zero block and identity block
Z0=matrix(data=0,nrow=nrow(taxa.Z),ncol=nrow(taxa.I))
Z1=diag(1,nrow(taxa.I))
ZC=as.matrix(rbind(Z0,Z1))

#To label rows and columns
label.row=rbind(as.matrix(Z[,1]),taxa.I)
label.col=t(taxa.I)

#update the zero block by the given Z matrix
position=t(as.matrix(match(taxa.Z.col,taxa.I,nomatch = 0)))
ZC[1:nrow(taxa.Z),position]=as.matrix(Z[-1,-1])

#habdler of parents do not have phenotype (colums of Z are not in taxa.I)
# To do list

#To form final Z matrix
dataPart=rbind(label.col,ZC)
Z=data.frame(cbind(label.row,dataPart))

#print("GAPIT.ZmatrixFormation accomplished successfully!")
return(Z)
}#The function GAPIT.ZmatrixFormation ends here

##############################################################################################
GAPIT.RemoveDuplicate <- function(Y){
#Object: NA
#Output: NA
#Authors: Zhiwu Zhang 
# Last update: Augus 30, 2011 
##############################################################################################
return (Y[match(unique(Y[,1]), Y[,1], nomatch = 0), ] )
}

##############################################################################################
GAPIT.QC <- function(Y,KI,GT,CV,Z){
#Object: to do data quality control
#Output: Y, KI, GD, CV, Z, flag
#Authors: Zhiwu Zhang and Alex Lipka 
# Last update: April 14, 2011 
##############################################################################################

#Remove duplicates 
print("Removing duplicates...")
Y=GAPIT.RemoveDuplicate(Y)
CV=GAPIT.RemoveDuplicate(CV)
Z=GAPIT.RemoveDuplicate(Z)


#Remove missing phenotype
print("Removing NaN...")
Y=Y[which(Y[,2]!="NaN"),]

# Remove duplicates: 
# GT row wise, Z column wise, and KI both direction.
if(exists("GT"))
{ 
taxa.kept=unique(GT[,1])
}

taxa.all=KI[,1]
taxa.uniqe=unique(taxa.all)
position=match(taxa.uniqe, taxa.all,nomatch = 0)
position.addition=cbind(1,t(1+position))
KI=KI[position,position.addition]

print("Maching Z with Kinship rowwise...")
if(exists("Z"))
{
taxa.all=as.matrix(Z[1,])
taxa.uniqe=intersect(taxa.all,taxa.all)
position=match(taxa.uniqe, taxa.all,nomatch = 0)
Z=Z[,position]
}


#Remove the columns of Z if they are not in KI/GT. KI/GT are allowed to have individuals not in Z
print("Maching Z with Kinship colwise...")
taxa.all=KI[,1]
taxa.kinship=unique(taxa.all)
taxa.Z=as.matrix(Z[1,])
#taxa.Z=colnames(Z) #This does not work for names starting with numerical or "-"
taxa.Z_K_common=intersect(taxa.kinship,taxa.Z)
Z <-cbind(Z[,1], Z[,match(taxa.Z_K_common, taxa.Z, nomatch = 0)])

#Remove the rows of Z if all the ellements sum to 0
print("Maching Z without origin...")
Z1=Z[-1,-1]
Z2=data.frame(Z1)
Z3=as.matrix(Z2)
Z4=as.numeric(Z3) #one dimemtion
Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1))
RS=rowSums(Z5)>0
#The above process could be simplified!
Z <- Z[c(TRUE,RS),]

#make individuals the same in Z, Y, GT and CV
print("Maching GT and CV...")
if(length(Z)<=1)stop("GAPIT says: Kinship is required or provide genotype to esitmate it!")


# get intersect of all the data
taxa=intersect(Z[-1,1],Y[,1])
if(exists("GT"))taxa=intersect(taxa,taxa.kept)
if(exists("CV"))taxa=intersect(taxa,CV[,1])
if(length(taxa)<=1)stop("GAPIT says: There is no individual ID matched to covariate. Please check!")

#keep the common ones
t=c(TRUE, Z[-1,1]%in%taxa)
Z <- Z[t,]
if(length(t)<=2)stop("GAPIT says: There is no individual ID matched among data. Please check!")

#Remove the columns of Z if all the ellements sum to 0
print("QC final process...")
Z1=Z[-1,-1]
Z2=data.frame(Z1)
Z3=as.matrix(Z2)
Z4=as.numeric(Z3) #one dimemtion
Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1))
CS=colSums(Z5)>0
#The above process could be simplified!
Z <- Z[,c(TRUE,CS)]


Y <- Y[Y[,1]%in%taxa,]
if(exists("GT")) taxa.kept=data.frame(taxa.kept[taxa.kept%in%taxa])
if(exists("CV")) CV=CV[CV[,1]%in%taxa,]

#get position of taxa.kept in GT
position=match(taxa.kept[,1], GT[,1],nomatch = 0)

#To sort Y, GT, CV and Z
Y=Y[order(Y[,1]),]
CV=CV[order(CV[,1]),]
order.taxa.kept=order(taxa.kept[,1])
Z=Z[c(1,1+order(Z[-1,1])),]
GTindex=position[order.taxa.kept]
flag=nrow(Y)==nrow(Z)-1&nrow(Y)==nrow(GT)&nrow(Y)==nrow(CV)

print("GAPIT.QC accomplished successfully!")
return(list(Y = Y, KI = KI, GT = GT, CV = CV, Z = Z, GTindex=GTindex, flag=flag))
}#The function GAPIT.QC ends here

##############################################################################################
GAPIT.Compress <- function(KI,CA = "average",KT = "Mean",GN=nrow(KI),Timmer,Memory){
#Object: To cluster individuals into groups based on kinship
#Output: GA, KG
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: April 14, 2011 
##############################################################################################

#For debug 
#GN=7

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP start") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp start")

# Extract the line names
line.names <- KI[,1]

# Remove the first column of the kinship matrix, which is the line names
KI <- KI[ ,-1]

# Convert kinship to distance
distance.matrix <- 2 - KI 
distance.matrix.as.dist <- as.dist(distance.matrix)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP distance") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp distance")

#print(paste("The value of CA is ", CA, sep = ""))



# hclust() will perform the hiearchical cluster analysis
cluster.distance.matrix <- hclust(distance.matrix.as.dist, method = CA)


Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cluster") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp cluster")

# Cutree will assign lines into k clusters
group.membership <- cutree(cluster.distance.matrix, k = GN) 

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cutree") 
#Memory=GAPIT.Memory(Memory=Memory,Infor="cp cutree")

#calculate group kinship
if(KT == "Mean"){
#This matrix opeRation is much faster than tapply function for  "Mean"
x=as.factor(group.membership)
#b = model.matrix(~x-1) 
n=max(as.numeric(as.vector(x)))
b=diag(n)[x,]

KG=t(b)%*%as.matrix(KI)%*%b
CT=t(b)%*%(0*as.matrix(KI)+1)%*%b
KG=as.matrix(KG/CT)
rownames(KG)=c(1:nrow(KG))
colnames(KG)=c(1:ncol(KG))

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation original")
#Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation original")
}

gm=as.factor(group.membership)
kv=as.numeric(as.matrix(KI))
kvr=rep(gm,ncol(KI))
kvc=as.numeric(t(matrix(kvr,nrow(KI),ncol(KI))))
kInCol=t(rbind(kv,kvr,kvc))




if(KT != "Mean"){
#if (KT == "Mean")
#    KG<- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), mean)

if (KT == "Max")    
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), max)
if (KT == "Min")   
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), min)    
if (KT == "Median")  
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), median)  
} #this is end of brancing "Mean" and the rest
    
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation")

# add line names 
GA <- data.frame(group.membership)
GA <- data.frame(cbind(as.character(line.names),as.numeric(group.membership) ))

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP Final") 
#Memory=GAPIT.Memory(Memory=Memory,Infor="CP Final")

#write.table(KG, paste("KG_from_", KT, "_Method.txt"), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)

#print("GAPIT.Compress accomplished successfully!")
return(list(GA=GA, KG=KG,Timmer=Timmer,Memory=Memory))
}#The function GAPIT.Compress ends here

##############################################################################################
GAPIT.Block <- function(Z,GA,KG){
#Object: To split a group kinship into two blocks containing individuals with and without phenotype
#Output: GAU,KW,KO,KWO
#Authors: Zhiwu Zhang and Alex Lipka 
# Last update: April 14, 2011 
##############################################################################################

# To separate group kiship into two blocks: with and without phenotype.
# A group goes to with phenotype as loog as it has one phenotyped individual.

#find position in group assignment (GA) for the individual associate with phenotype (specified by Z)
#taxa=unique(intersect(as.matrix(Z[1,-1]),GA[,1]))

taxa.Z=as.matrix(Z[1,-1])
taxa.GA=as.matrix(GA[,1])
position=taxa.GA%in%taxa.Z

#Initial block as 2
GAU=cbind(GA,2)

#Assign block as 1 if the individual has phenotype
GAU[position,3]=1

#Modify the non-phenotyped individuals if they in a group with phenotyped individuals
#To find the groups with phenotyped individuals
#update block assignment for all these groups
#get list of group that should be block 1

grp.12=as.matrix(unique(GAU[,2]))
grp.1=as.matrix(unique(GAU[which(GAU[,3]==1),2]))
grp.2= as.matrix(setdiff(grp.12,grp.1))
numWithout=length(grp.2)

order.1=1:length(grp.1)
order.2=1:length(grp.2)
if(numWithout >0) grpblock=as.matrix(rbind(cbind(grp.1,1,order.1), cbind(grp.2,2,order.2)))
if(numWithout==0) grpblock=as.matrix(      cbind(grp.1,1,order.1),                       )

order.block=order(as.matrix(GAU[,3]))
colnames(grpblock)=c("grp","block","ID")

GAU0 <- merge(GAU[order.block,-3], grpblock, by.x = "X2", by.y = "grp")
GAU=GAU0[,c(2,1,3,4)]

KW=KG[grp.1,grp.1]
KO=KG[grp.2,grp.2]
KWO=KG[grp.1,grp.2]

#write.table(GAU, "GAU.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)

#print("GAPIT.Block accomplished successfully!")
return(list(GAU=GAU,KW=KW,KO=KO,KWO=KWO))
}#The function GAPIT.Block ends here


##############################################################################################
GAPIT.ZmatrixCompress <- function(Z,GAU){
#Object: To assign the fraction of a individual belonging to a group
#Output: Z
#Authors: Zhiwu Zhang
# Last update: April 14, 2011 
##############################################################################################

#sort Z column wise
order.Z=order(as.matrix(Z[1,-1]))
Z1=Z[-1,-1]
Z1 <- Z1[,order.Z]

#Extraction of GAU coresponding to Z, sort GAU rowwise to mach columns of Z, and make design matrix
effect.Z=as.matrix(Z[1,-1])
effect.GAU=as.matrix(GAU[,1])
GAU0=GAU[effect.GAU%in%effect.Z,]
order.GAU=order(GAU0[,1])
GAU1 <- GAU0[order.GAU,]
id.1=GAU1[which(GAU1[,3]==1),4]
n=max(as.numeric(as.vector(id.1)))
x=as.numeric(as.matrix(GAU1[,4]))
DS=diag(n)[x,]

#write.table(b, "debug.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)
#write.table(GAU1, "debug2.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)

#Z matrix from individual to group
Z1.numeric <- as.numeric(as.matrix(Z1))
Z1.matrix <- matrix(Z1.numeric, nrow = nrow(Z1), ncol = ncol(Z1)) 
Z2=Z1.matrix%*%DS

Z3=data.frame(cbind(as.character(Z[-1,1]),Z2))
Z=Z3[order(Z3[,1]),]


#write.table(DS, "debug.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)

#print("GAPIT.ZmatrixCompress accomplished successfully!")
return(list(Z=Z))
}#The function GAPIT.ZmatrixCompress ends here

##############################################################################################
GAPIT.GS <- function(KW,KO,KWO,GAU,UW){
#Object: to derive BLUP for the individuals without phenotype
#Output: BLUP
#Authors: Zhiwu Zhang 
# Last update: April 17, 2011 
##############################################################################################

UO=try(t(KWO)%*%solve(KW)%*%UW)
if(inherits(UO, "try-error")) UO=t(KWO)%*%ginv(KW)%*%UW

n=ncol(UW) #get number of columns, add additional for individual name

#Assign BLUP of group to its individuals
BLUP=data.frame(as.matrix(GAU[,1:4]))

BLUP.W=BLUP[which(GAU[,3]==1),]
order.W=order(as.numeric(as.matrix(BLUP.W[,4])))
ID.W=as.numeric(as.matrix(BLUP.W[order.W,4]))
n.W=max(ID.W)
DS.W=diag(n.W)[ID.W,]
ind.W=DS.W%*%UW
all.W=cbind(BLUP.W[order.W,],ind.W)
all=all.W

BLUP.O=BLUP[which(GAU[,3]==2),]
if(nrow(BLUP.O)>0){
order.O=order(as.numeric(as.matrix(BLUP.O[,4])))
ID.O=as.numeric(as.matrix(BLUP.O[order.O,4]))
n.O=max(ID.O)
DS.O=diag(n.O)[ID.O,]
ind.O=DS.O%*%UO
all.O=cbind(BLUP.O[order.O,],ind.O)
all=rbind(all.W,all.O)
}

colnames(all)=c("Taxa", "Group", "RefInf","ID","BLUP","PEV")
#write.table(index.W, "debug.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)

#print("GAPIT.GS accomplished successfully!")
return(list(BLUP=all))
}#The function GAPIT.GS ends here

##############################################################################################
GAPIT.Numericalization1.Add <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value

n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="N",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="N",1,ifelse(x==lev[1],0,ifelse(x==lev[3],1,2)))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization1.Add function

##############################################################################################
GAPIT.Numericalization1.Dom <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"


n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)


# 2 or 3 status
x=ifelse((len<2|len>3),0,ifelse(x=="N",.5,ifelse((x=="R"|x=="Y"|x=="S"|x=="W"|x=="K"|x=="M"),0,1)))

return(matrix(x,length(x),1))
}#end of GAPIT.Numericalization1.Dom function


##############################################################################################
GAPIT.Numericalization2.Add <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)


#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization2.Add function

##############################################################################################
GAPIT.Numericalization2.Dom <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)

#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x1=0

# more than 3 status
if(len> 3){
x1=0
warning("Existing none biallelic SNP!")
}

#2 status
if(len==2|len==3)x1=ifelse(x=="NN",1,ifelse(substr(x,1,1)==substr(x,2,2),0,2))

return(matrix(x1,length(x),1))
}#end of GAPIT.Numericalization2.Dom function

##############################################################################################
GAPIT.HapMap <- function(G,model="Add",heading=TRUE){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

#print(paste("Converting hampmap format to numerical under model of ", model,sep=""))
#gc()
#GAPIT.Memory.Object(name.of.trait="HapMap.Start")

#GT=data.frame(G[1,-(1:11)])
if(heading){
GT= t(G[1,-(1:11)])
GI= G[-1,c(1,3,4)]
}else{
GT=NULL
GI= G[,c(1,3,4)]
}

bit=nchar(as.character(G[2,12])) #to determine number of bits of genotype
#print(paste("Number of bits for genotype: ", bit))

if(bit==2){
if(heading){
if(model=="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Add)
if(model!="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Dom)
}else{
if(model=="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Add)
if(model!="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Dom)
}
}

if(bit==1){
if(heading){
if(model=="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Add)
if(model!="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Dom)
}else{
if(model=="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Add)
if(model!="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Dom)
}
}

if(heading)colnames(GT)="taxa"
colnames(GI)=c("SNP","Chromosome","Position")

#GAPIT.Memory.Object(name.of.trait="HapMap.Finished")
#print(paste("Succesfuly finished converting hampmap which has bits of ", bit,sep=""))
return(list(GT=GT,GD=GD,GI=GI))
}#end of GAPIT.HapMap function


##############################################################################################
GAPIT.CVMergePC<- function(X,Y){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

#Z=X+Y

Z <- merge(X, Y, by.x = colnames(X)[1], by.y = colnames(Y)[1])

return(Z)
}#end of GAPIT.CVMergePCfunction



##############################################################################################
GAPIT.Memory <- function(Memory =NULL,Infor){
#Object: To report memory usage
#Output: Memory 
#Authors: Zhiwu Zhang
# Last update: June 6, 2011 
##############################################################################################
gc()
size <- memory.size()
#print(paste("Memory usage: ",size," for", Infor))
if(is.null(Memory)) {
Increased=0
Memory =cbind(Infor,size ,Increased)
}else{
Increased=0
Memory.current=cbind(Infor,size ,Increased)
Memory=rbind(Memory,Memory.current)
Memory[nrow(Memory),3]=as.numeric(as.matrix(Memory[nrow(Memory),2]))-as.numeric(as.matrix(Memory[nrow(Memory)-1,2]))
}

return (Memory)
}#end of GAPIT.Memory function

##############################################################################################
GAPIT.Memory.Object <- function(name.of.trait="Trait"){
# Object: To report memoery usage
# Authors: Heuristic Andrew
# http://heuristically.wordpress.com/2010/01/04/r-memory-usage-statistics-variable/
# Modified by Zhiwu Zhang
# Last update: may 29, 2011 
##############################################################################################
  
# print aggregate memory usage statistics 
print(paste('R is using', memory.size(), 'MB out of limit', memory.limit(), 'MB')) 
  
# create function to return matrix of memory consumption 
object.sizes <- function() 
{ 
    return(rev(sort(sapply(ls(envir=.GlobalEnv), function (object.name) 
        object.size(get(object.name)))))) 
} 

# export file in table format 
memory=object.sizes() 
file=paste("GAPIT.", name.of.trait,".Memory.Object.csv" ,sep = "")
write.table(memory, file, quote = FALSE, sep = ",", row.names = TRUE,col.names = TRUE)


# export file in PDF format 
pdf(paste("GAPIT.", name.of.trait,".Memory.Object.pdf" ,sep = ""))
# draw bar plot 
barplot(object.sizes(), 
    main="Memory usage by object", ylab="Bytes", xlab="Variable name", 
    col=heat.colors(length(object.sizes()))) 
# draw dot chart 
dotchart(object.sizes(), main="Memory usage by object", xlab="Bytes") 
# draw pie chart 
pie(object.sizes(), main="Memory usage by object")
dev.off()  
}  



##############################################################################################
GAPIT.Timmer <- function(Timmer=NULL,Infor){
#Object: To report current time
#Output: Timmer
#Authors: Zhiwu Zhang
# Last update: may 8, 2011 
##############################################################################################

Time<- Sys.time()
if(is.null(Timmer)) {
Elapsed=0
Timmer=cbind(Infor,Time,Elapsed)
}else{
Elapsed=0
Timmer.current=cbind(Infor,Time,Elapsed)
Timmer=rbind(Timmer,Timmer.current)
Timmer[nrow(Timmer),3]=as.numeric(as.matrix(Timmer[nrow(Timmer),2]))-as.numeric(as.matrix(Timmer[nrow(Timmer)-1,2]))
}

#print(paste('Time used: ', Timmer[nrow(Timmer),3], ' seconds for ',Infor,sep="" )) 
return (Timmer)
}#end of GAPIT.EMMAxP3D function


##############################################################################################
GAPIT.Log <- function(Y=Y,KI=KI,Z=Z,CV=CV,emmaXp3d=emmaXp3d,
				groupFrom = groupFrom ,groupTo =groupTo ,groupBy = groupBy ,CA = CA, KT= KT,
                      	ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait){
#Object: To report model factors
#Output: Text file (GAPIT.Log.txt)
#Authors: Zhiwu Zhang
# Last update: may 16, 2011 
##############################################################################################

#Creat storage
facto <- list(NULL)
value <- list(NULL)

#collecting model factors

facto[[1]]="Trait"
value[[1]]=paste(dim(Y))

facto[[2]]="groupBy "
value[[2]]=groupBy 

facto[[3]]="Trait name "
value[[3]]=name.of.trait

facto[[4]]="Kinship"
value[[4]]=dim(KI)

facto[[5]]="Z Matrix"
value[[5]]=dim(Z)

facto[[6]]="Covariate"
value[[6]]=dim(CV)

facto[[7]]="EMMAxP3D"
value[[7]]=emmaXp3d

facto[[8]]="Clustering algorithms"
value[[8]]=CA

facto[[9]]="Group kinship"
value[[9]]=KT

facto[[10]]="groupFrom "
value[[10]]=groupFrom 

facto[[11]]="groupTo "
value[[11]]=groupTo 



theLog=as.matrix(cbind(facto,value))
#theLog=as.character(as.matrix(cbind(facto,value)))
colnames(theLog)=c("Model", "Value")
file=paste("GAPIT.", name.of.trait,".Log.csv" ,sep = "")
write.table(theLog, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

return (theLog)
}


##############################################################################################
GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure <- function(PWI = PWI, FDR.Rate = 0.05, FDR.Procedure = "BH"){
#Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure
#Output: PWIP, number.of.significant.SNPs
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: May 5, 2011 
##############################################################################################


    #Make sure that your compouter has the latest version of Bioconductor (the "Biobase" package) and multtest

if(is.null(PWI))
{
PWIP=NULL
number.of.significant.SNPs = 0
}

if(!is.null(PWI))
{  
 
    #library(multtest)
    
    if(dim(PWI)[1] == 1){
     PWIP <- cbind(PWI, PWI[4])
     colnames(PWIP)[5] <- "FDR_Adjusted_P-values"
    }
   
    if(dim(PWI)[1] > 1){ 
    #mt.rawp2adjp Performs the Simes procedure.  The output should be two columns, Left column: originial p-value
    #Right column: Simes corrected p-value
    res <- mt.rawp2adjp(PWI[,4], FDR.Procedure)

    #This command should order the p-values in the order of the SNPs in the data set
  adjp <- res$adjp[order(res$index), ]

  #round(adjp[1:7,],4)
    #Logical statment: 0, if Ho is not rejected; 1, if  Ho is rejected, by the Simes corrected p-value
  temp <- mt.reject(adjp[,2], FDR.Rate)

    #Lists all number of SNPs that were rejected by the BY procedure
  #temp$r

    #Attach the FDR adjusted p-values to AS_Results

  PWIP <- cbind(PWI, adjp[,2])

    #Sort these data by lowest to highest FDR adjusted p-value
  PWIP <- PWIP[order(PWIP[,4]),]
  
  colnames(PWIP)[7] <- "FDR_Adjusted_P-values"
  number.of.significant.SNPs = temp$r
  }
  #print("GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure accomplished successfully!")
}  
  return(list(PWIP=PWIP, number.of.significant.SNPs = number.of.significant.SNPs))

}



##############################################################################################
GAPIT.Pruning <- function(values,DPP=5000){
#Object: To get index of subset that evenly distribute
#Output: Index
#Authors: Zhiwu Zhang
# Last update: May 28, 2011 
##############################################################################################

#No change if below the requirement
if(length(values)<=DPP)return(c(1:length(values)))

#values= log.P.values

values=sqrt(values)  #This shift the weight a little bit to the low building.
theMin=min(values)
theMax=max(values)
range=theMax-theMin
interval=range/DPP

ladder=round(values/interval)
ladder2=c(ladder[-1],0)
keep=ladder-ladder2
index=which(keep>0)

return(index)
}#end of GAPIT.Pruning 



##############################################################################################
GAPIT.QQ <- function(P.values, plot.type = "log_P_values", name.of.trait = "Trait",DPP=50000){
#Object: Make a QQ-Plot of the P-values
#Options for plot.type = "log_P_values" and "P_values" 
#Output: A pdf of the QQ-plot
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: May 9, 2011 
##############################################################################################


# Sort the data by the raw P-values
P.values <- P.values[order(P.values)]
  
#Set up the p-value quantiles
p_value_quantiles <- (1:length(P.values))/(length(P.values)+1)


if(plot.type == "log_P_values")
{
    log.P.values <- -log10(P.values)
    log.Quantiles <- -log10(p_value_quantiles)
	
    index=GAPIT.Pruning(log.P.values,DPP=DPP)
    log.P.values=log.P.values[index ]
    log.Quantiles=log.Quantiles[index]

    pdf(paste("GAPIT.", name.of.trait,".QQ-Plot.pdf" ,sep = ""))
    par(mar = c(5,5,5,5))
    qqplot(log.Quantiles, log.P.values, xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), 
           cex.axis=1.5, cex.lab=2, lty = 1, lwd = 1, col = "Blue" ,xlab =expression(Expected~~-log[10](italic(p))),
           ylab = expression(Observed~~-log[10](italic(p))), main = paste(name.of.trait,sep=" "))
    abline(a = 0, b = 1, col = "red")
    dev.off()   
}


if(plot.type == "P_values")
{
  pdf(paste("QQ-Plot_", name.of.trait,".pdf" ,sep = ""))
  par(mar = c(5,5,5,5))
  qqplot(p_value_quantiles, P.values, xlim = c(0,1), 
         ylim = c(0,1), type = "l" , xlab = "Uniform[0,1] Theoretical Quantiles", 
         lty = 1, lwd = 1, ylab = "Quantiles of P-values from GWAS", col = "Blue",
         main = paste(name.of.trait,sep=" "))
  abline(a = 0, b = 1, col = "red")
  dev.off()   
}


  #print("GAPIT.QQ  accomplished successfully!")
  

}


##############################################################################################
GAPIT.Manhattan <- function(GI.MP = NULL, name.of.trait = "Trait", 
                   plot.type = "Genomewise",DPP=50000){
#Object: Make a Manhattan Plot
#Options for plot.type = "Separate_Graph_for_Each_Chromosome" and "Same_Graph_for_Each_Chromosome" 
#Output: A pdf of the Manhattan Plot
#Authors: Alex Lipka, Zhiwu Zhang, and Meng Li 
# Last update: May 10, 2011 
##############################################################################################

print("Manhattan ploting...")

#do nothing if null input
if(is.null(GI.MP)) return

GI.MP=matrix(as.numeric(as.matrix(GI.MP) ) ,nrow(GI.MP),ncol(GI.MP))

#Remove all SNPs that do not have a choromosome and bp position
GI.MP <- GI.MP[!is.na(GI.MP[,1]),]
GI.MP <- GI.MP[!is.na(GI.MP[,2]),]

#Remove all SNPs that have P values above 0 (not na etc)
GI.MP <- GI.MP[GI.MP[,3]>0,]

#Replace P the -log10 of the P-values
GI.MP[,3] <-  -log10(GI.MP[,3])
y.lim <- ceiling(max(GI.MP[,3]))
chm.to.analyze <- unique(GI.MP[,1]) 
chm.to.analyze=chm.to.analyze[order(chm.to.analyze)]
numCHR= length(chm.to.analyze)

#Chromosomewise plot
if(plot.type == "Chromosomewise")
{
  pdf(paste("GAPIT.", name.of.trait,".Manhattan-Plot.Chromosomewise.pdf" ,sep = ""), width = 10)
  par(mar = c(5,5,4,3), lab = c(8,5,7))
  for(i in 1:numCHR)
  {
    #Extract SBP on this chromosome
    subset=GI.MP[GI.MP[,1]==chm.to.analyze[i],]
  	
    x <- as.numeric(subset[,2])/10^(6)
    y <- as.numeric(subset[,3])
 	  #print(paste("befor prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x)))


        
  	#Prune most non important SNPs off the plots
    order=order(y,decreasing = TRUE)
    y=y[order]
    x=x[order]
    
    index=GAPIT.Pruning(y,DPP=round(DPP/numCHR))
      	
  	x=x[index]
  	y=y[index]
  	
 	  #print(paste("after prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x)))
 	  
    #color.vector <- subset(temp.par.data[,7], temp.par.data[,4] == i)
    plot(y~x,type="p", ylim=c(0,y.lim), xlim = c(min(x), max(x)), col = "navy", xlab = expression(Base~Pairs~(x10^-6)), ylab = "-Log Base 10 p-value", main = paste("Chromosome",chm.to.analyze[i],sep=" "))
  	#print("manhattan plot (chr) finished")    
  }
  dev.off()
  print("manhattan plot on chromosome finished")
} #Chromosomewise plot


#Genomewise plot
if(plot.type == "Genomewise")
{
  
  GI.MP <- GI.MP[order(GI.MP[,2]),]
  GI.MP <- GI.MP[order(GI.MP[,1]),]
  color.vector <- rep(c("orangered","navyblue"),numCHR)
  ticks=NULL
  lastbase=0
  
  #change base position to accumulatives
  for (i in chm.to.analyze)
  {
    index=(GI.MP[,1]==i)
    ticks <- c(ticks, lastbase+mean(GI.MP[index,2])) 
    GI.MP[index,2]=GI.MP[index,2]+lastbase
    lastbase=max(GI.MP[index,2])
  }

    x0 <- as.numeric(GI.MP[,2])
    y0 <- as.numeric(GI.MP[,3])
    z0 <- as.numeric(GI.MP[,1])
	position=order(y0,decreasing = TRUE)
    index0=GAPIT.Pruning(y0[position],DPP=DPP)
	index=position[index0]
	x=x0[index]
	y=y0[index]
	z=z0[index]

  pdf(paste("GAPIT.", name.of.trait,".Manhattan-Plot.Genomewise.pdf" ,sep = ""), width = 10)
  par(mar = c(5,5,5,1))

 plot(y~x,xlab=expression(Chromosome),ylab=expression(-log[10](italic(p))) ,
       cex.lab=2,col=ifelse(z%%2==0,"orangered","navy"),axes=FALSE,type = "p",pch=20,main = paste(name.of.trait,sep=" "))

 axis(1, at=ticks,cex.axis=1.5,labels=chm.to.analyze,tick=F)
  axis(2, at=1:y.lim,cex.axis=1.5,labels=1:y.lim,tick=F)
  box()
  dev.off()
} #Genomewise plot

  #print("GAPIT.Manhattan accomplished successfully!")
} #end of GAPIT.Manhattan


##############################################################################################
GAPIT.Compression.Visualization <- function(Compression = Compression, name.of.trait = name.of.trait){
#Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure
#Output: Three pdfs: One of the log likelihood function, one of the genetic and error variance component,
#                    and one of the heritabilities
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: May 10, 2011 
##############################################################################################

#Graph the optimum compression 

if(length(Compression)<=6) Compression=t(as.matrix(Compression[which(Compression[,4]!="NULL"&Compression[,4]!="NaN"),]))
if(length(Compression)>6) Compression=Compression[which(Compression[,4]!="NULL"&Compression[,4]!="NaN"),]

LL=as.numeric(Compression[,4])
Compression.best=Compression[which(LL==min(LL)),] 

if(length(Compression.best)>6) Compression.best=Compression.best[1,] #Keep the first if multiple combinations
variance=as.numeric(Compression.best[5:6])
colors <- c("grey50","grey70")
labels0 <- round(variance/sum(variance) * 100, 1)
labels <- paste(labels0, "%", sep="")
LL.best0=as.numeric(Compression.best[4]  )
LL.best=floor(LL.best0*100)/100
theOptimum=paste(c(Compression.best[c(1:3)],LL.best) )

pdf(paste("GAPIT.", name.of.trait,".Optimum.pdf", sep = ""), width = 14)
par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7))
pie(variance,  col=colors, labels=labels,angle=45)
legend(1.0, 0.5, c("Genetic varaince","Residual varaiance"), cex=1.5, 
   fill=colors)

#Display the optimum compression
text(1.5,.0, "The optimum compression", col= "red")
for(i in 1:4){
text(1.5,-.1*i, theOptimum[i], col= "red")
}
dev.off() 

#Graph compression with multiple groups
if(length(unique(Compression[,3]))>1)
{
#Create a vector of colors
color.vector.basic <- c("red","blue","black", "blueviolet","indianred","cadetblue","orange")
color.vector.addition <- setdiff(c(colors()[grep("red",colors())], colors()[grep("blue",colors())]),color.vector.basic )
color.vector.addition.mixed <- sample(color.vector.addition,max(0,((length(unique(Compression[,1])) * length(unique(Compression[,2])))-length(color.vector.basic))))  
color.vector <- c(color.vector.basic,color.vector.addition.mixed )


#Create a vector of numbers for the line dot types
line.vector <-  rep(1:(length(unique(Compression[,1])) * length(unique(Compression[,2]))))



#We want to have a total of three plots, one displaying the likelihood function, one displaying the variance components, and one displaying the
# heritability 

pdf(paste("GAPIT.", name.of.trait,".Compression.multiple.group.", ".pdf", sep = ""), width = 14)

par(mfrow = c(2,3), mar = c(5,5,1,1), lab = c(5,5,7))

# Make the likelihood function plot

k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,4])  
      plot(y~x,type="l", pch = 30, lty = line.vector[i], ylim=c(min(as.numeric(Compression[,4])),max(as.numeric(Compression[,4]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "-2Log Likelihoood", )
      label = paste(c(as.character(unique(Compression[,1]))[k]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,4])  
      lines(y~x,type="l", pch = 30, lty = line.vector[i], col = color.vector[j])
      label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 

 

# Make the genetic variance component plots
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,5])  
      plot(y~x,type="l", pch = 17,  lty = line.vector[i], ylim=c(min(as.numeric(Compression[,5])),max(as.numeric(Compression[,5]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "Genetic Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,5])  
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topleft",  label, fill = color.vector) 


# Make the residual variance component plots
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,6])  
      plot(y~x,type="l", pch = 17,  ylim=c(min(as.numeric(Compression[,6])),max(as.numeric(Compression[,6]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "Residual Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,6])  
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 


#calculate total variance and h2
heritablilty.vector <- as.numeric(Compression[,5])/(as.numeric(Compression[,5]) + as.numeric(Compression[,6]))
totalVariance.vector <- as.numeric(as.numeric(Compression[,5]) + as.numeric(Compression[,6]))
Compression.h2 <- cbind(Compression, heritablilty.vector,totalVariance.vector)

# Make the total variance component plots
k <- 1
for(i in 1:length(unique(Compression.h2[,1]))){
  for(j in 1:length(unique(Compression.h2[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,8])  
      plot(y~x,type="l", pch = 17,  lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,8])),max(as.numeric(Compression.h2[,8]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))),
      col = color.vector[1], xlab = "Number of Groups", ylab = "Total Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,8]) 
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 
  

# Make the heritability plots 
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,7]) 

      plot(y~x,type="l", pch = 17,  lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,7])),max(as.numeric(Compression.h2[,7]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))),
      col = color.vector[1], xlab = "Number of Groups", ylab = "Heritability", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,7])  
      lines(y~x,type="l", lty = line.vector[i], pch = 17, col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }       
   }
 }
 
 #Make a legend
  #legend("topleft",  label, fill = color.vector) 
  
legend.col= 1+floor(length(unique(Compression[,1])) * length(unique(Compression[,2]))/20)
line.style=rep(1:length(unique(Compression[,1])), each = length(unique(Compression[,2])))      
line.color=rep(1:length(unique(Compression[,2])), length(unique(Compression[,1])))



# Make labels
      plot(0~0,axes=FALSE,type="l",ylab = "",xlab = "",frame.plot=FALSE)
      legend("topleft",  label, col = color.vector[line.color], lty = line.style, ncol=legend.col,horiz=FALSE) 
   
 
dev.off()
}#end of Graph compression with multiple groups

#Graph compression with single groups
if(length(unique(Compression[,3]))==1& length(unique(Compression[,1]))*length(unique(Compression[,2]))>1)
{

#Graph the compression with only one group
pdf(paste("GAPIT.Compression.single.group.", name.of.trait, ".pdf", sep = ""), width = 14)
par(mfrow = c(2,2), mar = c(5,5,1,1), lab = c(5,5,7))

nkt=length(unique(Compression[,1]))
nca=length(unique(Compression[,2]))
kvr=rep(c(1:nkt),nca)
kvc0=rep(c(1:nca),nkt)
kvc=as.numeric(t(matrix(kvc0,nca,nkt)))
kt.name=Compression[1:nkt,1]

ca.index=((1:nca)-1)*nkt+1
ca.name=Compression[ca.index,2]

KG<- t(tapply(as.numeric(Compression[,4]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "-2 Log Likelihood",beside=TRUE, col=rainbow(length(unique(Compression[,2]))))


KG<- t(tapply(as.numeric(Compression[,5]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Genetic varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2]))))

KG<- t(tapply(as.numeric(Compression[,6]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Residual varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2]))))

KG<- t(tapply(as.numeric(Compression[,5])/(as.numeric(Compression[,5])+as.numeric(Compression[,6])), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Heritability", beside=TRUE, col=rainbow(length(unique(Compression[,2]))),ylim=c(0,1))

legend("topleft", paste(t(ca.name)), cex=0.8,bty="n", fill=rainbow(length(unique(Compression[,2]))),horiz=TRUE)
dev.off() 
} #end of Graph compression with single groups

#print("GAPIT.Compression.Visualization accomplished successfully!")

}#GAPIT.Compression.Plots ends here





##############################################################################################
GAPIT.Table <- function(final.table = final.table, name.of.trait = name.of.trait,FDR.Filter.Rate=1){
#Object: Make and export a table of summary information from GWAS
#Output: A table summarizing GWAS results
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: May 10, 2011 
##############################################################################################

#Filter SNPs by FDR
index=(final.table[,7]<=FDR.Filter.Rate)
final.table=final.table[index,]

#Export this summary table as an excel file
write.table(final.table, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)


#print("GAPIT.Table accomplished successfully!")
  

}   #GAPIT.Table ends here



##############################################################################################
GAPIT.GS.Visualization <- function(gsBLUP = gsBLUP, BINS=BINS, name.of.trait = name.of.trait){
#Object: To build heat map to show distribution of BLUP and PEV
#Output: pdf
#Authors: Zhiwu Zhang 
# Last update: May 15, 2011 
##############################################################################################


nBin=BINS

BLUP= gsBLUP[,5]
PEV = gsBLUP[,6]

if(BLUP[1]=="NaN"){
  warning ("It was not converged. BLUP was not created!")
}
if(BLUP[1]!="NaN")
{
  range.BLUP=max(BLUP)-min(BLUP)
  range.PEV=max(PEV)-min(PEV)
  
  interval.BLUP=range.BLUP/nBin
  interval.PEV=range.PEV/nBin
  
  
  bin.BLUP=floor(BLUP/max(BLUP)*nBin)*max(BLUP)/nBin
  bin.PEV=floor(PEV/max(PEV)*nBin)*max(PEV)/nBin
  
  
  distinct.BLUP=unique(bin.BLUP)
  distinct.PEV=unique(bin.PEV)
  
  Position.BLUP=match(bin.BLUP,distinct.BLUP,nomatch = 0)
  Position.PEV=match(bin.PEV,distinct.PEV,nomatch = 0)
  
  value=matrix(1,length(Position.BLUP))
  KG<- (tapply(as.numeric(value), list(Position.BLUP, Position.PEV), sum))
  
  rownames(KG)=round(distinct.BLUP, digits = 4)
  colnames(KG)=round(distinct.PEV, digits = 4)
  
  #Sort the rows and columns in order from smallest to largest
  
  rownames(KG) <- rownames(KG)[order(as.numeric(rownames(KG)))]
  colnames(KG) <- colnames(KG)[order(as.numeric(colnames(KG)))]
  
  #write.table(KG, "Input_Matrix_for_GS_Heat_Map.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)

  pdf(paste("GAPIT.", name.of.trait,".GPS.BLUPvsPEV", ".pdf", sep = ""),width = 9)
  #par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7))
  par(mar = c(5,5,6,5))
  
  nba_heatmap <- heatmap(KG, Rowv=NA, Colv=NA,  col =  rev(heat.colors(256)),   scale="column", 
  xlab = "PEV", ylab = "BLUP", main = " ")

  #nba_heatmap <- heatmap.2(KG,  cexRow =.2, cexCol = 0.2, scale="none", symkey=FALSE, trace="none" )
 
  
  #cexRow =0.9, cexCol = 0.9)
  dev.off() 
}
#print("GAPIT.GS.Visualization accomplished successfully!")

}   #GAPIT.GS.Visualization ends here


      
##############################################################################################
GAPIT.kinship.loiselle <- function(snps, method="additive", use="all") {
# Object: To calculate the kinship matrix using the method of Loiselle et al. (1995)
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 


  #Number of SNP types that are 0s
  n0 <- sum(snps==0,na.rm=TRUE)
  #Number of heterozygote SNP types
  nh <- sum(snps==1,na.rm=TRUE)
  #Number of SNP types that are 1s
  n1 <- sum(snps==2,na.rm=TRUE)
  #Number of SNP types that are missing
  nNA <- sum(is.na(snps))
  

 
  #Self explanatory
  dim(snps)[1]*dim(snps)[2]
  #stopifnot(n0+nh+n1+nNA == length(snps))

    
  #Note that the two lines in if(method == "dominant") and if(method == "recessive") are found in
  #if(method == "additive").  Worry about this only if you have heterozygotes, which you do not.
  if ( method == "dominant" ) {
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) > 1),ncol(snps),nrow(snps))
    snps[!is.na(snps) && (snps == 1)] <- flags[!is.na(snps) && (snps == 1)]
  }
  else if ( method == "recessive" ) {
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) < 1),ncol(snps),nrow(snps))
    snps[!is.na(snps) && (snps == 1)] <- flags[!is.na(snps) && (snps == 1)]
  }
  else if ( ( method == "additive" ) && ( nh > 0 ) ) {
    dsnps <- snps
    rsnps <- snps
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) > 1),ncol(snps),nrow(snps))
    dsnps[!is.na(snps) && (snps==1)] <- flags[is.na(snps) && (snps==1)]
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) < 1),ncol(snps),nrow(snps))
    rsnps[!is.na(snps) && (snps==1)] <- flags[is.na(snps) && (snps==1)]
    snps <- cbind(dsnps,rsnps)
  }

  #mafs is a (# lines)x(# SNPs)x matrix.  The rows mafs are identical, and the ij^th element is the average
  #allele frequency for the SNP in the j^th column.
  
  #if(use == "all") imputes missing SNP type values with the expected (average) allele frequency.
  if ( use == "all" ) {
    mafs <- matrix(colMeans(snps,na.rm=TRUE),ncol(snps),nrow(snps))
    snps[is.na(snps)] <- mafs[is.na(snps)]
  }
  else if ( use == "complete.obs" ) {
    mafs <- matrix(colMeans(snps,na.rm=TRUE),ncol(snps),nrow(snps))
    snps <- snps[colSums(is.na(snps))==0,]
  }
  mafs_comp <- 1-mafs
  snps_comp <- 1-snps
  

  n <- nrow(snps)
  K <- matrix(nrow=n,ncol=n)
  diag(K) <- 1
  #Create the k term on page 1422 of Loiselle et al. (1995)

  missing <- rep(NA, dim(snps)[2])  
  for(i in 1:dim(snps)[2]) {
    missing[i] <- sum(is.na(snps[,i]))
  }
  

  for(i in 1:(n-1)) {
    for(j in (i+1):n) {
      Num_First_Term_1 <- (snps[i,]-mafs[i,])*(snps[j,]-mafs[j,])
      Num_First_Term_2 <- (snps_comp[i,]-mafs_comp[i,])*(snps_comp[j,]-mafs_comp[j,])
      First_Term <- sum(Num_First_Term_1)+sum(Num_First_Term_2)

      Num_Second_Term_1 <- mafs[,i]*(1-mafs[,i])
      Num_Second_Term_2 <- mafs_comp[,i]*(1-mafs_comp[,i])
      Num_Second_Term_Bias_Correction <- 1/((2*n)-missing - 1)
      Num_Second_Term <-  Num_Second_Term_1 + Num_Second_Term_2
      Second_Term <- sum(Num_Second_Term*Num_Second_Term_Bias_Correction)

      Third_Term <- sum(Num_Second_Term) 
      
      f <- (First_Term + Second_Term)/Third_Term
      if(f <= 0){
            K[i,j] <- 0
      }
      else{
            K[i,j] <- f
      }
      K[j,i] <- K[i,j]
    }
  }
  return(K)
}


##############################################################################################

GAPIT.PCA <- function(X,taxa, PC.number = min(ncol(X),nrow(X))){
# Object: Conduct a principal component analysis, and output the prinicpal components into the workspace,
#         a text file of the principal components, and a pdf of the scree plot
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 

#Conduct the PCA 
PCA.X <- prcomp(X)

#Create a Scree plot 
pdf("GAPIT.PCA.eigenValue.pdf", width = 12, height = 12)
par(mar = c(5,5,5,5))
screeplot(PCA.X, type="lines")
dev.off()

pdf("GAPIT.PCA_1vs2.pdf", width = 12, height = 12)
par(mar = c(5,5,5,5))
plot(PCA.X$x[,1],PCA.X$x[,2],xlab="PC1",ylab="PC2",pch=1,col="red",cex=1.0,cex.lab=1.5, cex.axis=1.2, lwd=2,las=1)
dev.off()

#Extract number of PCs needed
PCs <- cbind(taxa,as.data.frame(PCA.X$x[,1:PC.number]))

#Remove duplicate (This is taken care by QC)
#PCs.unique <- unique(PCs[,1])
#PCs <-PCs[match(PCs.unique, PCs[,1], nomatch = 0), ]

#Write the PCs into a text file
write.table(PCs, "GAPIT.PCA.csv", quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

#Return the PCs
return(PCs)
}



      
##############################################################################################
GAPIT.kinship.loiselle <- function(snps, method="additive", use="all") {
# Object: To calculate the kinship matrix using the method of Loiselle et al. (1995)
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 
 
  #Number of SNP types that are 0s
  n0 <- sum(snps==0,na.rm=TRUE)
  #Number of heterozygote SNP types
  nh <- sum(snps==0.5,na.rm=TRUE)
  #Number of SNP types that are 1s
  n1 <- sum(snps==1,na.rm=TRUE)
  #Number of SNP types that are missing
  nNA <- sum(is.na(snps))
  

 
  #Self explanatory
  dim(snps)[1]*dim(snps)[2]
  #stopifnot(n0+nh+n1+nNA == length(snps))

    
  #Note that the two lines in if(method == "dominant") and if(method == "recessive") are found in
  #if(method == "additive").  Worry about this only if you have heterozygotes, which you do not.
  if ( method == "dominant" ) {
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))
    snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)]
  }
  else if ( method == "recessive" ) {
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))
    snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)]
  }
  else if ( ( method == "additive" ) && ( nh > 0 ) ) {
    dsnps <- snps
    rsnps <- snps
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))
    dsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)]
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))
    rsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)]
    snps <- rbind(dsnps,rsnps)
  }

  #mafs is a (# SNPs)x(# lines) matrix.  The columns of mafs are identical, and the ij^th element is the average
  #allele frequency for the SNP in the i^th row.
  
  #if(use == "all") imputes missing SNP type values with the expected (average) allele frequency.
  if ( use == "all" ) {
    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))
    snps[is.na(snps)] <- mafs[is.na(snps)]
  }
  else if ( use == "complete.obs" ) {
    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))
    snps <- snps[rowSums(is.na(snps))==0,]
  }
  mafs_comp <- 1-mafs
  snps_comp <- 1-snps
  

  n <- ncol(snps)
  K <- matrix(nrow=n,ncol=n)
  diag(K) <- 1
  #Create the k term on page 1422 of Loiselle et al. (1995)

  missing <- rep(NA, dim(snps)[1])  
  for(i in 1:dim(snps)[1]) {
    missing[i] <- sum(is.na(snps[i,]))
  }
  

  for(i in 1:(n-1)) {
    for(j in (i+1):n) {
      Num_First_Term_1 <- (snps[,i]-mafs[,i])*(snps[,j]-mafs[,j])
      Num_First_Term_2 <- (snps_comp[,i]-mafs_comp[,i])*(snps_comp[,j]-mafs_comp[,j])
      First_Term <- sum(Num_First_Term_1)+sum(Num_First_Term_2)

      Num_Second_Term_1 <- mafs[,i]*(1-mafs[,i])
      Num_Second_Term_2 <- mafs_comp[,i]*(1-mafs_comp[,i])
      Num_Second_Term_Bias_Correction <- 1/((2*n)-missing - 1)
      Num_Second_Term <-  Num_Second_Term_1 + Num_Second_Term_2
      Second_Term <- sum(Num_Second_Term*Num_Second_Term_Bias_Correction)

      Third_Term <- sum(Num_Second_Term) 
      
      f <- (First_Term + Second_Term)/Third_Term

      K[i,j] <- f
      if(K[i,j]<0) K[i,j]=0
      
      K[j,i] <- K[i,j]
    }
  }
  return(K)
}

##############################################################################################
GAPIT.replaceNaN <- function(LL) {
#handler of grids with NaN log
#Authors: Zhiwu Zhang
# Last update: may 12, 2011 
##############################################################################################

#handler of grids with NaN log 
index=(LL=="NaN")
if(length(index)>0) theMin=min(LL[!index])
if(length(index)<1) theMin="NaN"
LL[index]=theMin
return(LL)    
}

################################################################################################################
emma.REMLE <- function(y, X, K, Z=NULL, ngrids=100, llim=-10, ulim=10,
              esp=1e-10, eig.L = NULL, eig.R = NULL) {
# Authors: Hyun Min Kang
# Modified (only one line) by Zhiwu Zhang to handle non-defined LL ("NaN") by replacing it with the worst LL.
# Last update: June 8, 2011 
################################################################################################################


  n <- length(y)
  t <- nrow(K)
  q <- ncol(X)

#  stopifnot(nrow(K) == t)
  stopifnot(ncol(K) == t)
  stopifnot(nrow(X) == n)

  if ( det(crossprod(X,X)) == 0 ) {
    warning("X is singular")
    return (list(REML=0,delta=0,ve=0,vg=0))
  }

  if ( is.null(Z) ) {
    if ( is.null(eig.R) ) {
      eig.R <- emma.eigen.R.wo.Z(K,X)
    }
    etas <- crossprod(eig.R$vectors,y)
  
    logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim
    m <- length(logdelta)
    delta <- exp(logdelta)
    Lambdas <- matrix(eig.R$values,n-q,m) + matrix(delta,n-q,m,byrow=TRUE)
    Etasq <- matrix(etas*etas,n-q,m)
    LL <- 0.5*((n-q)*(log((n-q)/(2*pi))-1-log(colSums(Etasq/Lambdas)))-colSums(log(Lambdas)))
    dLL <- 0.5*delta*((n-q)*colSums(Etasq/(Lambdas*Lambdas))/colSums(Etasq/Lambdas)-colSums(1/Lambdas))
    
    optlogdelta <- vector(length=0)
    optLL <- vector(length=0)
    if ( dLL[1] < esp ) {
      optlogdelta <- append(optlogdelta, llim)
      optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim,eig.R$values,etas))
    }
    if ( dLL[m-1] > 0-esp ) {
      optlogdelta <- append(optlogdelta, ulim)
      optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim,eig.R$values,etas))
    }

    for( i in 1:(m-1) )
      {
        if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) 
        {
          r <- uniroot(emma.delta.REML.dLL.wo.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas=etas)
          optlogdelta <- append(optlogdelta, r$root)
          optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root,eig.R$values, etas))
        }
      }
#    optdelta <- exp(optlogdelta)
  }
  else {
    if ( is.null(eig.R) ) {
      eig.R <- emma.eigen.R.w.Z(Z,K,X)
    }
    etas <- crossprod(eig.R$vectors,y)
    etas.1 <- etas[1:(t-q)]
    etas.2 <- etas[(t-q+1):(n-q)]
    etas.2.sq <- sum(etas.2*etas.2)
  
    logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim
    m <- length(logdelta)
    delta <- exp(logdelta)
    Lambdas <- matrix(eig.R$values,t-q,m) + matrix(delta,t-q,m,byrow=TRUE)
    Etasq <- matrix(etas.1*etas.1,t-q,m)
    dLL <- 0.5*delta*((n-q)*(colSums(Etasq/(Lambdas*Lambdas))+etas.2.sq/(delta*delta))/(colSums(Etasq/Lambdas)+etas.2.sq/delta)-(colSums(1/Lambdas)+(n-t)/delta))
    
    optlogdelta <- vector(length=0)
    optLL <- vector(length=0)
    if ( dLL[1] < esp ) {
      optlogdelta <- append(optlogdelta, llim)
      optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim,eig.R$values,etas.1,n,t,etas.2.sq))
    }
    if ( dLL[m-1] > 0-esp ) {
      optlogdelta <- append(optlogdelta, ulim)
      optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim,eig.R$values,etas.1,n,t,etas.2.sq))
    }

    for( i in 1:(m-1) )
      {
        if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) 
        {
          r <- uniroot(emma.delta.REML.dLL.w.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas.1=etas.1, n=n, t1=t, etas.2.sq = etas.2.sq )
          optlogdelta <- append(optlogdelta, r$root)
          optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root,eig.R$values, etas.1, n, t, etas.2.sq ))
        }
      }
#    optdelta <- exp(optlogdelta)
  }
  
  maxdelta <- exp(optlogdelta[which.max(optLL)])
  
  #handler of grids with NaN log
  optLL=GAPIT.replaceNaN(optLL)   
  
  maxLL <- max(optLL)
  if ( is.null(Z) ) {
    maxva <- sum(etas*etas/(eig.R$values+maxdelta))/(n-q)    
  }
  else {
    maxva <- (sum(etas.1*etas.1/(eig.R$values+maxdelta))+etas.2.sq/maxdelta)/(n-q)
  }
  maxve <- maxva*maxdelta

  return (list(REML=maxLL,delta=maxdelta,ve=maxve,vg=maxva))
}


##############################################################################################
GAPIT.Genotype <- function(G=NULL,GD=NULL,GM=NULL,KI=NULL,
                kinMethod=NULL,model="Add",numPCs=0,seed=123, Ratio =1,
                dataPath=NULL,numFiles=1, num.read = 1000,GPSonly=FALSE,
                GFile =NULL,GFileExt =NULL,
                GDFile=NULL,GDFileExt=NULL,
                GMFile=NULL,GMFileExt=NULL,
                mafRate=0.05,FDR.Rate = 0.05,FDR.Filter.Rate=1,
                Timmer=NULL,Memory=NULL){
#Object: To unify genotype and calculate kinship and PC if required:
#       1.For G data, convert it to GD
#       2.For GD data return it back simply if no KI and PC required
#       3.Samling GD and create KI and PC
#       4.Go through multiple files
#       5.In any case, GD must be returned (for QC)
#Output: GD, GI, GT, KI and PC
#Authors: Zhiwu Zhang
#Last update: August 11, 2011
##############################################################################################

print("Genotyping: numericalization, sampling kinship, PCs and much more...")
#Create logical variables
byData=!is.null(G) | !is.null(GD)
byFile=!is.null(GFile) | !is.null(GDFile)
hasGenotype=(byData | byFile  )
needKinPC=(is.null(KI) | numPCs>0)

if(!is.null(KI) & !byData & !byFile & GPSonly) {
return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory))
}

#Set indicator for full GD
fullGD=FALSE
if(byData) fullGD=TRUE
if(byFile & Ratio==1 & needKinPC) fullGD=TRUE

#SET GT to NULL in case of no genotype
if(!byData & !byFile) {
return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory))
}

genoFormat="hapmap"
if(is.null(G)&is.null(GFile)) genoFormat="EMMA"

#Diagnose user setteing
if(!is.null(KI) &!is.null(kinMethod))  stop("GAPIT says: You can not specify kinMethod and provide kinship at same time!!!")

if(!needKinPC &Ratio<1)  stop("GAPIT says: You did not require calculate kinship or PCs. Ratio should not be specified!!!")


if(GPSonly & is.null(KI) & !byData & !byFile)  stop("GAPIT says: For GPSonly optioin, please input either use KI or use genotype")
 
#if(is.null(dataPath) & !byData & byFile) stop("GAPIT Ssays: A path for genotype data should be provided!")
if( (numFiles<1) & !byData & byFile) stop("APIT Ssays: Number of file should be provided: >=1")
if(!is.null(G) & !is.null(GD)) stop("APIT Ssays: Both hapmap and EMMA format exist, choose one only.")

if(!is.null(GDFile) & is.null(GMFile)) stop("APIT Ssays: Genotype data and map files should be in pair")
if(is.null(GDFile) & !is.null(GMFile)) stop("APIT Ssays: Genotype data and map files should be in pair")

if(!is.null(GD) & is.null(GM)) stop("APIT Ssays: Genotype data and map files should be in pair")
if(is.null(GD) & !is.null(GM)) stop("APIT Ssays: Genotype data and map files should be in pair")

#if(!byData & !byFile) stop("APIT Ssays: Either genotype data or files should be given!")

#if(byData&(!is.null(dataPath))) stop ("APIT Ssays: You have provided geotype data. Datapath should not be provided!")
if(byData&numFiles>1) stop ("APIT Ssays: You have profided geotype data. Number of files should not be specified!")

#Multiple genotype files
if(is.null(numFiles))numFiles=1
if(numFiles<1)numFiles=1

#Set defaul method for kinMethod
if(is.null(kinMethod)) kinMethod="VanRaden"

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype loaded")
Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype loaded")


#Handler of read data in nuerical format (EMMA)
#Rename GM as GI
if(!is.null(GM))GI=GM
rm(GM)
gc()
#Extract GD and GT from read data GD
if(!is.null(GD) )
{
GT=as.matrix(GD[,1])  #get taxa
GD=as.matrix(GD[,-1]) #remove taxa column
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GT created from GD)")
Memory=GAPIT.Memory(Memory=Memory,Infor="GT created from GD")
}


#Hapmap format
if(!is.null(G))
{
#Convert HapMap to numerical
print(paste("Converting genotype...",sep=""))
hm=GAPIT.HapMap(G,model=model)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="HapMap")
Memory=GAPIT.Memory(Memory=Memory,Infor="HapMap")
print(paste("Converting genotype done.",sep=""))
rm(G)
gc()
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="G removed")
Memory=GAPIT.Memory(Memory=Memory,Infor="G removed")

GT=hm$GT
GD=hm$GD
GI=hm$GI
rm(hm)
gc()
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="hm removed")
Memory=GAPIT.Memory(Memory=Memory,Infor="hm removed")
}




#From files
if(!byData & byFile){

  numFilesUsed=numFiles
  if(!needKinPC)numFilesUsed=1
  
  #Initial GI as storage
  GT=NULL
  GI=NULL

  #multiple fragments or files
  for (file in 1:numFilesUsed){
    frag=1
    numSNP=num.read
    myFRG=NULL
    while(numSNP==num.read) {     #this is problematic if the read end at the last line
    print(paste("Rading file: ",file,"Fragment: ",frag))
      myFRG=GAPIT.Fragment( dataPath=dataPath,  numFiles=numFiles,GFile=GFile,GFileExt=GFileExt,
                            seed=seed,Ratio=Ratio,model=model,genoFormat=genoFormat,
                            GDFile=GDFile,GDFileExt=GDFileExt,GMFile=GMFile,GMFileExt=GMFileExt,num.read=num.read,file=file,frag=frag)


      print("Fragment called succesfully")
      if(is.null(GT) & !is.null(myFRG$GT))GT= as.matrix(myFRG$GT)
            
      if(is.null(GD)){
        GD= myFRG$GD
      }else{
        if(!is.null(myFRG$GD))    { 
        print("what is myFRG$GD?") 
        #print(is(myFRG$GD))    
        #print(dim(myFRG$GD))
          GD=cbind(GD,myFRG$GD)
        }
      }
          
      if(is.null(GI)){
        GI= myFRG$GI
      }else{
        if(!is.null(myFRG$GI))    {
          colnames(myFRG$GI)=c("SNP","Chromosome","Position")        
          GI=as.data.frame(rbind(as.matrix(GI),as.matrix(myFRG$GI)))
        }
      }
      
      print("This fragment is joined")

      if(file==1 & frag==1)GT=as.matrix(myFRG$GT)

      frag=frag+1
      if(!is.null(myFRG$GI))    {
        numSNP=ncol(myFRG$GD)
      }  else{
       numSNP=0
      }
      
      if(!needKinPC)numSNP=0  #force to end the while loop
      if(is.null(myFRG))numSNP=0  #force to end the while loop

    } #end of repeat on fragment
    print("This file is OK")
  } #end of file loop
  print("All files loaded")
} #end of if(!byData&byFile)


#print("file loaded")


Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Sampling genotype")
Memory=GAPIT.Memory(Memory=Memory,Infor="Sampling genotype")

#Plot thirt part kinship
print("Plotting Kinship")
if(!is.null(KI))
{
theKin=as.matrix(KI[,-1])
colnames(theKin)=KI[,1]
rownames(theKin)=KI[,1]

pdf(paste("GAPIT.Kin.thirdPart.pdf",sep=""), width = 12, height = 12)
par(mar = c(25,25,25,25))
heatmap.2(theKin,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
dev.off()
}

#Create kinship from genotype if not provide
if(is.null(KI)&!is.null(GD))
{
#print(dim(GD))


if(kinMethod=="EMMA")theKin= emma.kinship(snps=t(as.matrix(.5*GD)), method="additive", use="all")
if(kinMethod=="Loiselle")theKin= GAPIT.kinship.loiselle(snps=t(as.matrix(.5*GD)), method="additive", use="all")
if(kinMethod=="VanRaden")theKin= GAPIT.kinship.VanRaden(snps=as.matrix(GD))

colnames(theKin)=GT
rownames(theKin)=GT
#print("Kin created")


#Create heat map for kinship
pdf(paste("GAPIT.Kin.",kinMethod,".pdf",sep=""), width = 12, height = 12)
par(mar = c(25,25,25,25))
heatmap.2(theKin,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
dev.off()


#Write the kinship into a text file
KI=cbind(GT,as.data.frame(theKin)) #This require big memory. Need a way to solve it.
write.table(KI, paste("GAPIT.Kin.",kinMethod,".csv",sep=""), quote = FALSE, sep = ",", row.names = FALSE,col.names = FALSE)

rm(theKin)
gc()

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Estimating kinship")
Memory=GAPIT.Memory(Memory=Memory,Infor="Estimating kinship")
print("Kinship created!")
}


#Create PC
PC=NULL
if(numPCs>0){
PC=GAPIT.PCA(X = GD, taxa = GT, PC.number = numPCs)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PCA")
Memory=GAPIT.Memory(Memory=Memory,Infor="PCA")
print("PC created")
}
print("Genotype successfully acomplished")
return (list(GD=GD,GI=GI,GT=GT,hasGenotype=hasGenotype, genoFormat=genoFormat, KI=KI,PC=PC,byFile=byFile,fullGD=fullGD,Timmer=Timmer,Memory=Memory))
}


##############################################################################################
GAPIT.kinship.VanRaden<- function(snps,hasInbred=TRUE) {
# Object: To calculate the kinship matrix using the method of VanRaden (2009, J. Dairy Sci. 91:4414?��C4423)
# Authors: Zhwiu Zhang
# Last update: August 15, 2011 
############################################################################################## 

#snps=hm$GD 
nSNP=ncol(snps)
nInd=nrow(snps)
n=nInd
snpMean= apply(snps,2,mean)
snps=snps-snpMean
K=tcrossprod((snps), (snps))

#Extract diagonals
i=1:n
j=(i-1)*n
index=i+j
d=K[index]
DL=min(d)
DU=max(d)
floor=min(K)

K=(K-floor)/(DL-floor)

MD=(DU-floor)/(DL-floor)

#Handler of diagonals over 2
if(MD>2)K[index]=K[index]/(MD-1)+1

#Handler of inbred
if(MD<2 & hasInbred) K=2*K/((DU-floor)/(DL-floor))

return(K)
}

##############################################################################################
GAPIT.Fragment <- function(dataPath=NULL,numFiles=1,GFile=NULL,GFileExt=NULL,seed=123,Ratio=1,model="Add",
                          genoFormat=NULL, GDFile=NULL, GDFileExt=NULL, GMFile=NULL, GMFileExt=NULL, num.read=NULL,
                          file=1,frag=1){
#Object: To load SNPs on a (frag)ment in file (this is to replace sampler)
#Output: genotype data sampled
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: August 18, 2011
##############################################################################################

#print("Fragmental reading...")
genoFormat="hapmap"
if(!is.null(GDFile)&is.null(GFile)) genoFormat="EMMA"
  
if(genoFormat=="hapmap"){
        #Initical G
        G=NULL
        if(frag==1){
          skip.1=0
          G <- try(read.table(paste(dataPath,GFile,file, ".",GFileExt,sep=""),
                          head = FALSE,skip = skip.1, nrows = num.read+1),silent=TRUE)
        }else{
          skip.1 <- (frag-1)*num.read +1
          G <- try(read.table(paste(dataPath,GFile,file, ".",GFileExt,sep=""),
                          head = FALSE,skip = skip.1, nrows = num.read),silent=TRUE )
        }
        
        if(inherits(G, "try-error"))  {
          G=NULL
          print("File end reached for G!!!")
        }

        if(is.null(G)){
        print("The above error indicating reading after end of file (It is OK).")
        return(list(GD=NULL,GI=NULL,GT=NULL) )
        }
        
        hm=GAPIT.HapMap(G,model=model,heading=(frag==1))
        rm(G)
        gc()
        print("hapmap called sucesfuly from fragment")
        if(Ratio==1) return(list(GD=hm$GD,GI=hm$GI,GT=hm$GT))

        if(Ratio<1){
        print("problem should be this section!")
          n= ncol(hm$GD)
          print(dim(hm$GD))
          print(dim(hm$GI))
          print(dim(hm$GT))
          
          set.seed(seed+(file*1000)+frag)
          sample=sample(1:n,floor(n*Ratio))
          return(list(GD=hm$GD[,sample],GI=hm$GI[sample,],GT=hm$GT))
        }
          print("ERROR: It should no get here!!!")        
} #end of "hapmap"



if(genoFormat=="EMMA"){
#print("The file is a numerical format!")
        #Initial GD
        GD=NULL
        skip.1 <- (frag-1)*num.read
        #Skip the remaining columns
        GD.temp <- try(read.table(paste(dataPath,GDFile, file, ".", GDFileExt,sep=""), head = TRUE, nrows = 1),silent=TRUE)
        num.SNP <- ncol(GD.temp)-1
        rm(GD.temp)
        read.in <- min(num.read,(num.SNP-skip.1))
        skip.2 <- max((num.SNP - (skip.1 + read.in)),0)


        GD <- try(read.table(paste(dataPath,GDFile,file, ".",GDFileExt,sep=""), head = TRUE,
                  colClasses = c("factor", rep("NULL", skip.1), rep("numeric", read.in),
                  rep("NULL", skip.2))) ,silent=TRUE)
        GI <- try(read.table(paste(dataPath,GMFile,file, ".",GMFileExt,sep=""), head = TRUE,
                  skip=skip.1, nrows=num.read) ,silent=TRUE)
                  
        if(inherits(GD, "try-error"))  {
          GD=NULL
          print("File end reached for GD!!!")
        }
        if(inherits(GI, "try-error"))  {
          GI=NULL
          print("File end reached for GI!!!")
        }                          
                  
        if(is.null(GD)) return(list(GD=NULL, GI=NULL,GT=NULL))
        
        GT=GD[,1]  #Extract infividual names

        GD=GD[,-1] #Remove individual names
#print("Numerical file read sucesfuly from fragment")        
        if(Ratio==1) return(list(GD=GD, GI=GI,GT=GT))
        
        if(Ratio<1){
          n= ncol(GD)
          set.seed(seed+file)
          sample=sample(1:n,floor(n*Ratio))
          return(list(GD=GD[,sample], GI=GI[sample,],GT=GT))
        }
    } # end of the "EMMA"
#print("fragment ended succesfully!")
}#End of fragment