##############################################################################################
GAPIT <- function(Y=NULL,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,GP=NULL,
                group.from=30 ,group.to=1000000,group.by=10,DPP=100000, 
                kinship.cluster="average", kinship.group='Mean',kinship.algorithm=NULL,kinship.complement=FALSE,
                bin.from=10000,bin.to=5000000,inclosure.from=10,inclosure.to=1000,inclosure.by=10,
                SNP.P3D=TRUE,SNP.effect="Add",PCA.total=0, SNP.fraction = 1, seed = 123, BINS = 20,SNP.test=TRUE, 
                SNP.MAF=0,FDR.Rate = 1, SNP.FDR=1,SNP.permutation=FALSE,SNP.CV=NULL,                             
                file.from=1, file.to=1, file.total=NULL, file.fragment = 99999,file.path=NULL, 
                file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL, 
                ngrid = 100, llim = -10, ulim = 10, esp = 1e-10,
                LD.chromosome=NULL,LD.location=NULL,LD.range=NULL){
GAPIT.Version="1.38 (Complementary kinship)"
#Object: To perform GWAS and GPS (Genomic Prediction/Selection)
#Designed by Zhiwu Zhang
#Writen by Alex Lipka, Feng Tian and Zhiwu Zhang
#Last update: September 23, 2011 
##############################################################################################

if(!is.null(Y)){
if (ncol(Y)<2)  stop ("Phenotype should have taxa name and one trait at least. Please correct phenotype file!")
for (trait in 2: ncol(Y))  {

gapitMain <- GAPIT.Main(Y=Y[,c(1,trait)],G=G,GD=GD,GM=GM,KI=KI,Z=Z,CV=CV,GP=GP,SNP.P3D=SNP.P3D,kinship.algorithm=kinship.algorithm,
                      kinship.complement=kinship.complement,bin.from=bin.from,bin.to=bin.to,inclosure.from=inclosure.from,inclosure.to=inclosure.to,inclosure.by=inclosure.by,
				              group.from=group.from,group.to=group.to,group.by=group.by,kinship.cluster=kinship.cluster,kinship.group=kinship.group,name.of.trait = colnames(Y)[trait],
                        file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM, 
                        SNP.MAF= SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,PCA.total=PCA.total,GAPIT.Version=GAPIT.Version,
                        GT=NULL, SNP.fraction = SNP.fraction, seed = seed, BINS = BINS,SNP.test=SNP.test,DPP=DPP, SNP.permutation=SNP.permutation,
                        LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,SNP.CV=SNP.CV)  
                 
#return (list(KI=gapitMain$KI,PC=gapitMain$PC))

}# end of loop on trait
if(ncol(Y)==2) {
h2= as.matrix(as.numeric(as.vector(gapitMain$Compression[,5]))/(as.numeric(as.vector(gapitMain$Compression[,5]))+as.numeric(as.vector(gapitMain$Compression[,6]))),length(gapitMain$Compression[,6]),1)
colnames(h2)=c("Heritability")
  return (list(GWAS=gapitMain$GWAS,GPS=gapitMain$GPS,compression=as.data.frame(cbind(gapitMain$Compression,h2)), kinship.optimum=gapitMain$kinship.optimum,kinship=gapitMain$kinship,PCA=gapitMain$PC))
}else{
  return (list(GWAS=NULL,GPS=NULL,compression=NULL,kinship.optimum=NULL,kinship=gapitMain$KI,PCA=gapitMain$PC))
}

}# end ofdetecting null Y

#Calculating kinship or PC in case that phenotype is not provided
if(is.null(Y)){
Timmer=GAPIT.Timmer(Infor="Kinsip and PC only")
Memory=GAPIT.Memory(Infor="Kinsip and PC only")
print("Phenotype was not provided. Genotype only...")

if(bin.from!=bin.to &!is.null(GP))stop("GAPIT says: Please make bin.from and bin.to the same!")
if(inclosure.from!=inclosure.to &!is.null(GP))stop("GAPIT says: Please make inclosure.from and inclosure.to the same!")
inclosure.size=inclosure.from

myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,GP=GP,kinship.algorithm=kinship.algorithm,PCA.total=PCA.total,
                kinship.complement=kinship.complement,bin.size=bin.size,inclosure.size=inclosure.size,
                file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
                SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect, seed = seed, SNP.fraction = SNP.fraction,
                LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range)

Timmer=myGenotype$Timmer
Memory=myGenotype$Memory
          
return (list(GWAS=NULL,GPS=NULL,compression=NULL,kinship.optimum=NULL,kinship=myGenotype$KI,PCA=myGenotype$PC))
}
print("GAPIT accomplished successfully!")
return(gapitMain)
}  #end of GAPIT function


##############################################################################################
GAPIT.Main <- function(Y,G=NULL,GD=NULL,GM=NULL,KI=NULL,Z=NULL,CV=NULL,SNP.P3D=TRUE,GP=NULL,
                group.from=1000000 ,group.to=1,group.by=10,kinship.cluster="average", kinship.group='Mean',kinship.algorithm=NULL,DPP=50000,
               	ngrid = 100, llin = -10, ulim = 10, esp = 1e-10,
                file.path=NULL,file.from=NULL, file.to=NULL, file.total=NULL, file.fragment = 512, file.G=NULL, file.Ext.G=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL,
                SNP.MAF=0,FDR.Rate=1,SNP.FDR=1,SNP.effect="Add",PCA.total=0,  GAPIT.Version=GAPIT.Version,
                name.of.trait, GT = NULL, SNP.fraction = 1, seed = 123, BINS = 20,SNP.test=TRUE,
                LD.chromosome=NULL,LD.location=NULL,LD.range=NULL,
                kinship.complement=FALSE, bin.from=10000,bin.to=5000000,inclosure.from=10,inclosure.to=1000,inclosure.by=10,
                SNP.permutation=FALSE,SNP.CV=NULL){
#Object: To perform GWAS and GPS (Genomic Prediction/Selection)
#Output: GWAS table (text file), QQ plot (PDF), Manhattan plot (PDF), genomic prediction (text file), and
#        genetic and residual variance components
#Authors: Zhiwu Zhang
# Last update: may 12, 2011
##############################################################################################

Timmer=GAPIT.Timmer(Infor="GAPIT (GWAS and GS in R)")
Memory=GAPIT.Memory(Infor="GAPIT (GWAS and GS in R)")

#Genotype processing and calculation Kin and PC
print("Procesing genotype...")
bin.size=bin.from
inclosure.size=inclosure.from

#First call to genotype to setup genotype data
myGenotype<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=KI,kinship.algorithm=kinship.algorithm,PCA.total=PCA.total,SNP.fraction=SNP.fraction,SNP.test=SNP.test,
                file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
                SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,
                LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,
                kinship.complement=kinship.complement,GP=NULL,bin.size=bin.size,inclosure.size=inclosure.size)

print("Genotype called from main function")                
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Kin and PCA")
Memory=GAPIT.Memory(Memory=Memory,Infor="Kin and PCA")

genoFormat=myGenotype$genoFormat
KI=myGenotype$KI
PC=myGenotype$PC
hasGenotype=myGenotype$hasGenotype
print("genotype called from main.")

#Second call to genotype to SUPER
if(!is.null(GP)){
myGenotype2<-GAPIT.Genotype(G=G,GD=GD,GM=GM,KI=NULL,kinship.algorithm=kinship.algorithm,PCA.total=PCA.total,SNP.fraction=SNP.fraction,SNP.test=SNP.test,
                file.path=file.path,file.from=file.from, file.to=file.to, file.total=file.total, file.fragment = file.fragment, file.G=file.G, file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
                SNP.MAF=SNP.MAF,FDR.Rate = FDR.Rate,SNP.FDR=SNP.FDR,SNP.effect=SNP.effect,
                LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range,
                kinship.complement=kinship.complement,GP=GP,bin.size=bin.size,inclosure.size=inclosure.size,SNP.CV=SNP.CV)
KI=myGenotype2$KI
SNP.QTN=myGenotype2$SNP.QTN

print("Output SNP.QTN")
print(length(myGenotype2$SNP.QTN))
print(head(myGenotype2$SNP.QTN))

print(dim(myGenotype2$GD))
print(dim(myGenotype$GD))

print(length(myGenotype2$GD))
print(length(myGenotype$GD))

print(is(myGenotype2$SNP.QTN))
QTN=myGenotype$GD[,myGenotype2$SNP.QTN] 
print(dim(QTN))
print(dim(myGenotype$GT))
print("Setting col name")
colnames(QTN)=myGenotype$GI[myGenotype2$SNP.QTN,1]

QTN=cbind(myGenotype$GT,QTN)   #add taxa
colnames(QTN)[1]="taxa"

print("final QTN")
print(dim(QTN))
write.table(QTN,paste("GAPIT.",name.of.trait,".SNP.QTN.csv",sep=""),sep=",", row.names = FALSE,col.names = TRUE)
} #end of if(!is.null(GP))


byFile=myGenotype$byFile
fullGD=myGenotype$fullGD
GD=myGenotype$GD
GI=myGenotype$GI
GT=myGenotype$GT


                

#Examine status of datasets
print("Examing data...")
if(is.null(Y)) stop ("GAPIT says: Phenotype must exist.")
if(is.null(KI)&missing(GD)) stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.")

#When GT and GD are missing, force to have fake ones (creating them from Y),GI is not required in this case
if(is.null(GD) & is.null(GT)) {
	GT=as.matrix(Y[,1])
	GD=matrix(1,nrow(Y),1)	
  GI=as.data.frame(matrix(0,1,3) )
  colnames(GI)=c("SNP","Chromosome","Position")
}

#merge CV with PC
if(PCA.total>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC)
if(PCA.total>0&is.null(CV))CV=PC

#Create Z as identity matrix from Y if it is not provided
if(is.null(Z)){
taxa=as.character(Y[,1])
Z=as.data.frame(diag(1,nrow(Y)))
Z=rbind(taxa,Z)
taxa=c('Taxa',as.character(taxa))
Z=cbind(taxa,Z)
}

#Add the part of non proportion in Z matrix
if(!is.null(Z)&nrow(Z)-1<nrow(Y)){
Z=GAPIT.ZmatrixFormation(Z=Z,Y=Y)
}

#Create CV with all 1's if it is not provided
if(is.null(CV)){
CV=Y[,1:2]
CV[,2]=1
colnames(CV)=c("taxa","overall")
}

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="ZMatrix")
#Memory=GAPIT.Memory(Memory=Memory,Infor="ZMatrix")

#Data quality control
print("Quality control...")

qc <- GAPIT.QC(Y=Y,KI=KI, GT=GT,CV=CV,Z=Z)

print("Preparing optimization...")

GTindex=qc$GTindex
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QC")
Memory=GAPIT.Memory(Memory=Memory,Infor="QC")

#Hanler of group boundry
if(group.from>group.to) stop("GAPIT says: group.to should  be larger than group.from. Please correct them!")

if(is.null(CV) | (!is.null(CV)& group.to<ncol(CV))) {
#The minimum of group is number of columns in CV
  group.from=1
  group.to=1
  warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to a and GLM is performed!")
}

if(!is.null(CV)& group.from<1) {
  group.from=1 #minimum of group is number of columns in CV
  warning("The lower bound of groups should be 1 at least. It was set to 1!")
}

if(group.to>nrow(qc$KI)) {
  group.to=nrow(qc$KI) #maximum of group is number of rows in KI
	if(group.from>nrow(qc$KI)) group.from=nrow(qc$KI)
  warning("The upper bound of groups is too high. It was set to the size of kinship!")
}

#set bin loop levels
bin.from=10000
bin.to=10000
bin.fold=ceiling(log2(bin.to/bin.from))
bin.seq=0:bin.fold
bin.level=bin.from*2^bin.seq

#set inclosure loop levels
inclosure.from=10
inclosure.to=10
inclosure.by=10
inclosure=seq(inclosure.from,inclosure.to,by=inclosure.by)


#Optimization for group number, cluster algorithm and kinship type
GROUP=seq(group.to,group.from,by=-group.by)#The reverse order is to make sure to include full model
if(missing("kinship.cluster")) kinship.cluster=c("ward", "single", "complete", "average", "mcquitty", "median", "centroid")
if(missing("kinship.group")) kinship.group=c("Mean", "Max", "Min", "Median")
numSetting=length(GROUP)*length(kinship.cluster)*length(kinship.group)

#Reform Y, GD and CV into EMMA format
ys=as.matrix(qc$Y[2])
X0=as.matrix(qc$CV[,-1])

#Initial
count=0
Compression=matrix(,numSetting,6)
colnames(Compression)=c("Type","Cluster","Group","REML","VA","VE")

#add indicator of overall mean
if(min(X0[,1])!=max(X0[,1])) X0 <- cbind(1, X0) #do not add overall mean if X0 has it already at first column

#--------------------------------------------------------------------------------------------------------------------#
print("Compressing..." )
print(paste("The total combinations: ", numSetting,sep=""))
#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="DataProcessing")
#Memory=GAPIT.Memory(Memory=Memory,Infor="DataProcessing")

#Loop to optimize cluster algorithm, group number and kinship type
for (bin in bin.level){
for (inc in inclosure){
for (ca in kinship.cluster){
for (group in GROUP){
for (kt in kinship.group){

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 1")
#Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 1")
count=count+1

if(group<ncol(X0)+1) group=1 # the emma function (emma.delta.REML.dLL.w.Z) does not allow K has dim less then CV. turn to GLM (group=1)

cp <- GAPIT.Compress(KI=qc$KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory)
Timmer=cp$Timmer
Memory=cp$Memory

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")

bk <- GAPIT.Block(Z=qc$Z,GA=cp$GA,KG=cp$KG)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")

zc <- GAPIT.ZmatrixCompress(Z=qc$Z,GAU =bk$GA)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")

#Reform KW and Z into EMMA format

z0=as.matrix(zc$Z[,-1])
Z=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Prio PreP3D")
Memory=GAPIT.Memory(Memory=Memory,Infor="Prio PreP3D")

#Do not screen SNP unless existing genotype and one combination
if(numSetting==1 & hasGenotype){
 optOnly=FALSE
}else{
optOnly=TRUE
}
if(!SNP.test) optOnly=TRUE

if(optOnly){
 colInclude=1
}else{
 colInclude=c(1:ncol(GD))
}

if(!optOnly) print("Compressing and Genome screening..." )

#Evaluating maximum likelohood
#print("Calling EMMAxP3D...")
p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,colInclude])),K = as.matrix(bk$KW) ,Z=Z,X0=X0,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
        SNP.permutation=SNP.permutation, GP=GP,kinship.complement=kinship.complement,
			 file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
       GTindex=GTindex,genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect)

Timmer=p3d$Timmer
Memory=p3d$Memory

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Post PreP3D")
Memory=GAPIT.Memory(Memory=Memory,Infor="Post PreP3D")

#print("Cluster algorithm, kinship type, groups, VG, Ve and REML:")
print(paste(count, "of",numSetting,"--","Vg=",round(p3d$vgs,4), "VE=",round(p3d$ves,4),"-2LL=",round(p3d$REMLs,2), "  Clustering=",ca,"  Group number=", group ,"  Group kinship=",kt,sep = " "))

#print(paste("CA is ",ca))
#print(paste("group is ",group))
#print(paste("kt is ",kt))

#recording Compression profile on array
Compression[count,1]=kt
Compression[count,2]=ca
Compression[count,3]=group
Compression[count,4]=p3d$REMLs
Compression[count,5]=p3d$vgs
Compression[count,6]=p3d$ves

}#end of for (ca in kinship.cluster)

#Skip the rest group in case group 1 is finished
if(group==1) break #To skip the rest group interations

}#end of for (group in GROUP)
}#end of for (kt in kinship.group)
}#end of for (inc in inclosure)
}#end of for (bin in bin.level)
#--------------------------------------------------------------------------------------------------------------------#
#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression")
#Memory=GAPIT.Memory(Memory=Memory,Infor="Copmression")

if(numSetting==1)
{
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")
}


#Perform GWAS with the optimum setting
#This section is omited if there is only one setting
if (numSetting>1) {
  print("Genomic screening..." )
  
optOnly=FALSE  #set default to false and change it to TRUE in these situations:
if(!hasGenotype) optOnly=TRUE
if(!SNP.test) optOnly=TRUE

if(optOnly){
 colInclude=1
}else{
 colInclude=c(1:ncol(GD))
}

  #Find the best ca,kt and group
  Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),]  #sort on REML
  kt=Compression[1,1]
  ca=Compression[1,2]
  group=Compression[1,3]
print(paste("Optimum: ",Compression[1,2],Compression[1,1],Compression[1,3],Compression[1,5], Compression[1,6],Compression[1,4] ,sep = " "))
  
  cp <- GAPIT.Compress(KI=qc$KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory)
  Timmer=cp$Timmer
  Memory=cp$Memory
  
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")
  
  bk <- GAPIT.Block(Z=qc$Z,GA=cp$GA,KG=cp$KG)
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")
  
  zc <- GAPIT.ZmatrixCompress(Z=qc$Z,GAU =bk$GA)
  
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")
  
  #Reform KW and Z into EMMA format
  
  z0=as.matrix(zc$Z[,-1])
  Z=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))

 print("Screening wiht the optimum setting") 
  p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,colInclude]))   ,K = as.matrix(bk$KW) ,Z=Z,X0=X0,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
          SNP.permutation=SNP.permutation, GP=GP,kinship.complement=kinship.complement,
    			 file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
           GTindex=GTindex,genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect)  
    
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")
 #print("Screening wiht the optimum setting done") 

}#end of if (numSetting>1 & hasGenotype & !SNP.test)

#Plotting optimum group kinship

if(length(bk$KW)>1 &length(bk$KW)<length(KI) & length(bk$KW)<1000){
pdf(paste("GAPIT.",name.of.trait,".Kin.Optimum.pdf",sep=""), width = 12, height = 12)
par(mar = c(25,25,25,25))
heatmap.2(as.matrix(bk$KW),  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
dev.off()
}



#Merge GWAS resultss from files to update ps,maf and nobs in p3d
if(byFile&!fullGD)
{
print("Loading GWAS results from file...")
for (file in file.from:file.to)
{

#Initicalization
frag=1
numSNP=file.fragment

while(numSNP==file.fragment) {     #this is problematic if the read end at the last line  

#Initicalization GI to detect reading empty line
#theGI=NULL
#theP=NULL
#theMAF=NULL
#thenobs=NULL

#reload results from files
print(paste("Current file ",file,"Fragment: ",frag))

theGI <- try(read.table(paste("GAPIT.TMP.GI.",file,".",frag,".txt",sep=""), head = TRUE)   ,silent=TRUE)
theP <- try(read.table(paste("GAPIT.TMP.ps.",file,".",frag,".txt",sep=""), head = FALSE)   ,silent=TRUE)
theMAF <- try(read.table(paste("GAPIT.TMP.maf.",file,".",frag,".txt",sep=""), head = FALSE),silent=TRUE)
thenobs <- try(read.table(paste("GAPIT.TMP.nobs.",file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)


if(inherits(theGI, "try-error"))  {
#if(nrow(theGI)<1){
  numSNP=0
  #print("This fragment is empty.")
}else{

#print("Records loaded for this fragment.")
  numSNP=nrow(theGI)  
  colnames(theP)="P"
  colnames(theMAF )="MAF"
  colnames(thenobs )="nobs"
  colnames(theGI) = colnames(GI)
 
#Merge results  
  if(file==file.from & frag==1){
    GI=theGI  
    allP=theP
    allMAF=theMAF
    allnobs=thenobs
  }else{
    allP=as.data.frame(rbind(as.matrix(allP),as.matrix(theP))  )
    allMAF=as.data.frame(rbind(as.matrix(allMAF),as.matrix(theMAF)) )
    allnobs=as.data.frame(rbind(as.matrix(allnobs),as.matrix(thenobs)))
    GI=as.data.frame(rbind(as.matrix(GI),as.matrix(theGI)))
  }

}#end of  if(inherits(theGI, "try-error")) (else section)

#setup for next fragment
frag=frag+1   #Progress to next fragment 

}#end of loop on fragment: while(numSNP==file.fragment)
}#end of loop on file

#update p3d with components from files
  p3d$ps=allP
  p3d$maf=allMAF
  p3d$nobs=allnobs
  
#Delete all the GAPIT.TMP files
  system('cmd /c del "GAPIT.TMP*.*"') 
  print("GWAS results loaded from all files succesfully!")
} #end of if(byFile)

#--------------------------------------------------------------------------------------------------------------------#
#Final report
print("Generating summary" )
GWAS=NULL
GPS=NULL
#genomic prediction
print("Genomic Breeding Values (GBV) ..." )

if(length(bk$KW)>ncol(X0)) {
  gs <- GAPIT.GS(KW=bk$KW,KO=bk$KO,KWO=bk$KWO,GAU=bk$GAU,UW=cbind(p3d$BLUP,p3d$PEV))
}

print("Writing GBV and Acc..." )

if(length(bk$KW)>ncol(X0)) {
  GPS=gs$BLUP
  write.table(gs$BLUP, paste("GAPIT.", name.of.trait,".BLUP.csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
}
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GPS")
Memory=GAPIT.Memory(Memory=Memory,Infor="GPS")

#Make heatmap for distribution of BLUP and PEV
print("GBV and accuracy distribution..." )
if(length(bk$KW)>ncol(X0)) {
  GAPIT.GS.Visualization(gsBLUP = gs$BLUP, BINS=BINS,name.of.trait = name.of.trait)
}

#Make a plot Summarzing the Compression Results, if more than one "compression level" has been assessed
print("Compression portfolios..." )
#print(Compression)
GAPIT.Compression.Visualization(Compression = Compression, name.of.trait = name.of.trait)
print("Compression Visualization done")

#Export GWAS results
if(hasGenotype &SNP.test &!is.na(Compression[1,4]))     #require not NA REML 
{

  print("Filtering SNPs with MAF..." )
	index=p3d$maf>=SNP.MAF	     
	PWI.Filtered=cbind(GI,p3d$ps,p3d$maf,p3d$nobs)[index,]
	colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs")
  	     
		     
  print("SNPs filtered with MAF")
  
  if(!is.null(PWI.Filtered))
  {

  #Run the BH multiple correction procedure of the results
  #Create PWIP, which is a table of SNP Names, Chromosome, bp Position, Raw P-values, FDR Adjusted P-values
  print("Calculating FDR..." )
  PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH")

  #QQ plots
  print("QQ plot..." )
  GAPIT.QQ(P.values = PWIP$PWIP[,4], name.of.trait = name.of.trait,DPP=DPP)

  #Manhattan Plots
   print("Manhattan plot (Genomewise)..." )
  GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise")
 print("Manhattan plot (Chromosomewise)..." )
  GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise")

  #Association Table
  print("Association table..." )
  #GAPIT.Table(final.table = PWIP$PWIP, name.of.trait = name.of.trait,SNP.FDR=SNP.FDR)
  GWAS=PWIP$PWIP[PWIP$PWIP[,7]<=SNP.FDR,]
  write.table(GWAS, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)



  
  } #end of if(!is.null(PWI.Filtered))
} #end of if(hasGenotype )

#Log
log=GAPIT.Log(Y=Y,KI=KI,Z=Z,CV=CV,SNP.P3D=SNP.P3D,
				group.from = group.from ,group.to =group.to ,group.by = group.by ,kinship.cluster = kinship.cluster, kinship.group= kinship.group,
                      	ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait)
#Memory usage
#GAPIT.Memory.Object(name.of.trait=name.of.trait)

#Timming
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Report")
Memory=GAPIT.Memory(Memory=Memory,Infor="Report")

file=paste("GAPIT.", name.of.trait,".Timming.csv" ,sep = "")
write.table(Timmer, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

file=paste("GAPIT.", name.of.trait,".Memory.Stage.csv" ,sep = "")
write.table(Memory, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

print(paste(name.of.trait, "has been analyzed successfully!") )
print(paste("The results are saved in the directory of ", getwd()) )
print("---------------------------------------------------------------------------------------")

return (list(Timmer=Timmer,Compression=Compression,kinship.optimum=cp$KG, kinship=myGenotype$KI,PC=PC,GWAS=GWAS, GPS=GPS))

}#The function GAPIT.Main ends here

##############################################################################################
GAPIT.EMMAxP3D <- function(ys,xs,K=NULL,Z=NULL,X0=NULL,GI=NULL,GP=NULL,kinship.complement=FALSE,
		file.path=NULL,file.from=NULL,file.to=NULL,file.total=1, genoFormat="Hapmap", file.fragment=NULL,byFile=FALSE,fullGD=TRUE,SNP.fraction=1,
    file.G=NULL,file.Ext.G=NULL,GTindex=NULL,file.GD=NULL, file.GM=NULL, file.Ext.GD=NULL,file.Ext.GM=NULL,
    SNP.P3D=TRUE,Timmer,Memory,optOnly=TRUE,SNP.effect="Add", SNP.permutation=FALSE,
    ngrids=100,llim=-10,ulim=10,esp=1e-10   ){
#Object: To esimate variance component by using EMMA algorithm and perform GWAS with P3D/EMMAx
#Output: ps, REMLs, stats, dfs, vgs, ves, BLUP,  BLUP_Plus_Mean, PEV
#Authors: Feng Tian, Alex Lipka and Zhiwu Zhang
# Last update: April 26, 2011
# Library used: EMMA (Kang et al, Genetics, Vol. 178, 1709-1723, March 2008)
# Note: This function was modified from the function of emma.REML.t from the library
##############################################################################################

#print("EMMAxP3D started...")
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="P3D Start")
Memory=GAPIT.Memory(Memory=Memory,Infor="P3D Start")

#--------------------------------------------------------------------------------------------------------------------<
#Change data to matrix format if they are not
if (is.null(dim(ys)) || ncol(ys) == 1)  ys <- matrix(ys, 1, length(ys))
if (is.null(X0)) X0 <- matrix(1, ncol(ys), 1)

#handler of special Z and K
if(!is.null(Z)){ if(ncol(Z) == nrow(Z)) Z = NULL }
#if(!is.null(K)) {if(length(K)<2) K = NULL}
if(!is.null(K)) {if(length(K)<1) K = NULL}

#Extract dimension information
g <- nrow(ys) #number of traits
n <- ncol(ys) #number of observation

q0 <- ncol(X0)#number of fixed effects
q1 <- q0 + 1  #Nuber of fixed effect including SNP

nr=n
if(!is.null(K)) tv=ncol(K)

#decomposation without fixed effect
#print("Caling emma.eigen.L...")
if(!is.null(K)) eig.L <- emma.eigen.L(Z, K) #this function handle both NULL Z and non-NULL Z matrix
#if(!is.null(K)) eig.L$values[which(eig.L$values<0)]=0  #Negative eigen values
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.L")
Memory=GAPIT.Memory(Memory=Memory,Infor="eig.L")



#decomposation with fixed effect (SNP not included)
#print("Calling emma.eigen.R.w.Z...")
X <-  X0 #covariate variables such as population structure

if (!is.null(Z) & !is.null(K)) eig.R <- try(emma.eigen.R.w.Z(Z, K, X)) #This will be used to get REstricted ML (REML)
if (is.null(Z)  & !is.null(K)) eig.R <- try(emma.eigen.R.wo.Z(   K, X)) #This will be used to get REstricted ML (REML)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="eig.R")
Memory=GAPIT.Memory(Memory=Memory,Infor="eig.R")

print("emma.eigen.R.w.Z called!!!")   
#Handler of error in emma
if(inherits(eig.R, "try-error"))  return(list(ps = NULL, REMLs = NA, stats = NULL, dfs = NULL,maf=NULL,nobs = NULL,Timmer=Timmer,Memory=Memory,
        vgs = NA, ves = NA, BLUP = NULL, BLUP_Plus_Mean = NULL,
        PEV = NULL, BLUE=NULL))


#-------------------------------------------------------------------------------------------------------------------->
#print("Looping through traits...")
#Loop on Traits
for (j in 1:g)
{

#--------------------------------------------------------------------------------------------------------------------<
if(!is.null(K)) REMLE <- emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R)
if (!is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE$delta)),rep(sqrt(1/REMLE$delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE)
if ( is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(  sqrt(1/(eig.L$values + REMLE$delta)),nr,length(eig.L$values),byrow=TRUE)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="REML")
Memory=GAPIT.Memory(Memory=Memory,Infor="REML")
#-------------------------------------------------------------------------------------------------------------------->

#The cases that go though multiple file once
file.stop=file.to
if(optOnly) file.stop=file.from 
if(fullGD)  file.stop=file.from 
if(!fullGD & !optOnly) print("Screening SNPs from file...")

#Add loop for genotype data files
for (file in file.from:file.stop)
{
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="New Genotype file")
Memory=GAPIT.Memory(Memory=Memory,Infor="New Genotype file")


  
  frag=1
  numSNP=file.fragment
  myFRG=NULL
while(numSNP==file.fragment) {     #this is problematic if the read end at the last line  

  #initial previous SNP storage
  x.prev <- vector(length = 0)  

  #force to skip the while loop if optOnly
  if(optOnly) numSNP=0
  
  #Determine the case of first file and first fragment: skip read file
  if(file==file.from & frag==1& SNP.fraction<1){
    firstFileFirstFrag=TRUE
  }else{
    firstFileFirstFrag=FALSE  
  }
  
#In case of xs is not full GD, replace xs from file
  if(!fullGD & !optOnly & !firstFileFirstFrag )
  { 
#update xs for each file
    rm(xs)
    gc()
    print(paste("Current file: ",file," , Fragment: ",frag,sep=""))
    myFRG=GAPIT.Fragment( file.path=file.path,  file.total=file.total,file.G=file.G,file.Ext.G=file.Ext.G,
                          seed=seed,SNP.fraction=SNP.fraction,SNP.effect=SNP.effect,genoFormat=genoFormat, 
                          file.GD=file.GD,file.Ext.GD=file.Ext.GD,file.GM=file.GM,file.Ext.GM=file.Ext.GM,file.fragment=file.fragment,file=file,frag=frag) 

      if(is.null(myFRG$GD)){
        xs=NULL
      }else{
        xs=myFRG$GD[GTindex,] 
      }


        if(!is.null(myFRG$GI))    {
          colnames(myFRG$GI)=c("SNP","Chromosome","Position")
          GI=as.matrix(myFRG$GI)
        }


      if(!is.null(myFRG$GI))    {
        numSNP=ncol(myFRG$GD)
      }  else{
       numSNP=0
      }
      if(is.null(myFRG))numSNP=0  #force to end the while loop
  } # end of if(!fullGD)   

  if(fullGD)numSNP=0  #force to end the while loop  

#Skip REML if xs is from a empty fragment file    
if(!is.null(xs))  {
  
  if (is.null(dim(xs)) || nrow(xs) == 1)  xs <- matrix(xs, length(xs),1)
  m <- ncol(xs) #number of SNPs
  t <- nrow(xs) #number of individuals
  
  #allocate spaces for SNPs
  dfs <- matrix(nrow = m, ncol = g)
  stats <- matrix(nrow = m, ncol = g)
  ps <- matrix(nrow = m, ncol = g)
  nobs <- matrix(nrow = m, ncol = g)
  maf <- matrix(nrow = m, ncol = g)
  #print(paste("Memory allocated.",sep=""))
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Memory allocation")
  Memory=GAPIT.Memory(Memory=Memory,Infor="Memory allocation")
  
  if(optOnly)mloop=0
  if(!optOnly)mloop=m
   
  #Loop on SNPs
  #print(paste("Number of SNPs is ",mloop," in genotype file ",file, sep=""))
for (i in 0:mloop){
#print(i)
#--------------------------------------------------------------------------------------------------------------------<
    normalCase=TRUE
    if ((i>0)&(floor(i/1000)==i/1000))  print(paste("Genotype file: ", file,", SNP: ",i," ",sep=""))

    # To extract current snp. It save computation for next one in case they are identical
    if(i==0&file==file.from){
      #For the model without fitting SNP
      vids <- !is.na(ys[j,]) #### Feng changed
      xv <- ys[j, vids]*0+1      #### Feng changed
    }

    if(i>0){
      vids <- !is.na(xs[,i]) #### Feng changed
      xv <- xs[vids,i]      #### Feng changed
      vids.TRUE=which(vids==TRUE)
      vids.FALSE=which(vids==FALSE)
      ns=length(xv)
      ss=sum(xv)
      maf[i]=min(.5*ss/ns,1-.5*ss/ns)
	nobs[i]=ns
    }

    #Situation of no variation for SNP except the fisrt one(synthetic for EMMAx/P3D)
    if ((min(xv) ==max(xv) )&i>0)
    {
      dfs[i, ] <- rep(NA, g)
      stats[i, ] <- rep(NA, g)
      ps[i, ] = rep(1, g)
      normalCase=FALSE
    }else if(identical(x.prev, xv))     #Situation of the SNP is identical to previous
    {
      if(i>1){
        dfs[i, ] <- dfs[i - 1, ]
        stats[i, ] <- stats[i - 1, ]
        ps[i, ] <- ps[i - 1, ]
        normalCase=FALSE
      }
    }
#-------------------------------------------------------------------------------------------------------------------->


    #Normal case
    if(normalCase)
    {

#--------------------------------------------------------------------------------------------------------------------<
      #nv <- sum(vids)
      yv <- ys[j, vids] #### Feng changed
      nr <- sum(vids) #### Feng changed
      if (!is.null(Z) & !is.null(K))
      {
        r<- ncol(Z) ####Feng, add a variable to indicate the number of random effect
        vran <- vids[1:r] ###Feng, add a variable to indicate random effects with nonmissing genotype
        tv <- sum(vran)  #### Feng changed
      }






#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<



      if(i>0) dfs[i, j] <- nr - q1
    	if(i>0) X <- cbind(X0[vids, , drop = FALSE], xs[vids,i])

       #Recalculate eig and REML if not using P3D  NOTE THIS USED TO BE BEFORE the two solid lines
      if(SNP.P3D==FALSE & !is.null(K))
      {
        if (!is.null(Z)) eig.R <- emma.eigen.R.w.Z(Z, K, X) #This will be used to get REstricted ML (REML)
        if (is.null(Z)) eig.R <- emma.eigen.R.wo.Z(   K, X) #This will be used to get REstricted ML (REML)
        if (!is.null(Z)) REMLE <- emma.REMLE(ys[j,], X, K, Z, ngrids, llim, ulim, esp, eig.R)
        if ( is.null(Z)) REMLE <- emma.REMLE(ys[j,], X, K, Z = NULL, ngrids, llim, ulim, esp, eig.R)
        if (!is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + REMLE$delta)),rep(sqrt(1/REMLE$delta),nr - tv)),nr,((nr-tv)+length(eig.L$values)),byrow=TRUE)
        if ( is.null(Z) & !is.null(K))  U <- eig.L$vectors * matrix(  sqrt(1/(eig.L$values + REMLE$delta)),nr,length(eig.L$values),byrow=TRUE)
      }

      if(n==nr)
      {
        if(!is.null(K))
        {
            yt <- crossprod(U, yv)
            Xt <- crossprod(U, X)
        }else{
        yt=yv
        Xt=X
        }
        XX=crossprod(Xt, Xt)



        if(XX[1,1] == "NaN")
        {
          Xt[which(Xt=="NaN")]=0
          yt[which(yt=="NaN")]=0
          XX=crossprod(Xt, Xt)
        }
        XY=crossprod(Xt, yt)
      }

      #Missing SNP
      if(n>nr)
      {
       UU=crossprod(U,U)
       A11=UU[vids.TRUE,vids.TRUE]
       A12=UU[vids.TRUE,vids.FALSE]
       A21=UU[vids.FALSE,vids.TRUE]
       A22=UU[vids.FALSE,vids.FALSE]
       A22i=try(solve(A22) )
       if(inherits(A22i, "try-error")) A22i <- ginv(A22)

       F11=A11-A12%*%A22i%*%A21
       XX=crossprod(X,F11)%*%X
       XY=crossprod(X,F11)%*%yv
      }
      iXX <- try(solve(XX) )
      if(inherits(iXX, "try-error")) iXX <- ginv(crossprod(Xt, Xt))
      beta <- iXX %*% XY

#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<
      if(i==0 &file==file.from  & !is.null(K))
      {
        Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="ReducedModel")
	  Memory=GAPIT.Memory(Memory=Memory,Infor="ReducdModel")

        #Calculate the blup gammahat = vgKZprimeVinv*(Y-Xbetahat)
        vgs <- REMLE$vg
        ves <- REMLE$ve
        REMLs <- REMLE$REML

        XtimesBetaHat <- X %*% beta
        YminusXtimesBetaHat <- ys[j,]- XtimesBetaHat
        vgK <- REMLE$vg*K
        Dt <- crossprod(U, YminusXtimesBetaHat)
        if (!is.null(Z))  Zt <- crossprod(U, Z)
        if (is.null(Z)) Zt <- t(U)

        if(XX[1,1] == "NaN")
        {
        Dt[which(Dt=="NaN")]=0
        Zt[which(Zt=="NaN")]=0
        }

        BLUP <- K %*% crossprod(Zt, Dt) #Using K instead of vgK because using H=V/Vg

        grand.mean.vector <- rep(beta[1], length(BLUP))
        BLUP_Plus_Mean <- grand.mean.vector + BLUP
    	  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="BLUP")
        Memory=GAPIT.Memory(Memory=Memory,Infor="BLUP")

        #PEV
        C11=try(vgs*solve(crossprod(Xt,Xt)))
        if(inherits(C11, "try-error")) C11=vgs*ginv(crossprod(Xt,Xt))

        C21=-K%*%crossprod(Zt,Xt)%*%C11
        Kinv=try(solve(K)    )
        if(inherits(Kinv, "try-error")) Kinv=ginv(K)

        if(!is.null(Z)) term.0=crossprod(Z,Z)/ves
        if(is.null(Z)) term.0=diag(1/ves,nrow(K))

        term.1=try(solve(term.0+Kinv/vgs )  )
        if(inherits(term.1, "try-error")) term.1=ginv(term.0+Kinv/vgs )

        term.2=C21%*%crossprod(Xt,Zt)%*%K
        C22=(term.1-term.2 )
        PEV=as.matrix(diag(C22))
        BLUE=X%*%beta

    	  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PEV")
        Memory=GAPIT.Memory(Memory=Memory,Infor="PEV")

      }#end of if(i==0&file==file.from   & !is.null(K))
#-------------------------------------------------------------------------------------------------------------------->

#--------------------------------------------------------------------------------------------------------------------<
      if(i==0 &file==file.from  & is.null(K))
      {
        YY=crossprod(yt, yt)
        ves=(YY-crossprod(beta,XY))/(n-q0)
        r=yt-X%*%iXX%*%XY
        REMLs=-.5*(n-q0)*log(det(ves))                  -.5*n                   -.5*(n-q0)*log(2*pi)
#       REMLs=-.5*n*log(det(ves)) -.5*log(det(iXX)/ves) -.5*crossprod(r,r)/ves  -.5*(n-q0)*log(2*pi)
        vgs = 0
        BLUP = 0
        BLUP_Plus_Mean = NaN
        PEV = ves
       	BLUE=X%*%beta
      }

      #calculate t statistics and probabilty
      if(i > 0)
      {
        if(!is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *REMLE$vg)
        if(is.null(K)) stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *ves)
        ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ],lower.tail = FALSE)
      }
#-------------------------------------------------------------------------------------------------------------------->

    } # End of if(normalCase)
    x.prev=xv #update SNP

} # End of loop on SNPs

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Screening SNPs")
Memory=GAPIT.Memory(Memory=Memory,Infor="Screening SNPs")

#output p value for the genotype file
if(!fullGD)
{
  write.table(GI, paste("GAPIT.TMP.GI.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = TRUE)
  write.table(ps, paste("GAPIT.TMP.ps.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  write.table(maf, paste("GAPIT.TMP.maf.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  write.table(nobs, paste("GAPIT.TMP.nobs.",file,".",frag,".txt",sep=""), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)
  #rm(dfs,stats,ps,nobs,maf,GI)   #This cause problem on return
  #gc()
}


    frag=frag+1   #Progress to next fragment
    
} #end of if(!is.null(X))
    
} #end of repeat on fragment 



} # Ebd of loop on file
} # End of loop on traits

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS done for this Trait")
Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS done for this Trait")


return(list(ps = ps, REMLs = -2*REMLs, stats = stats, dfs = dfs,maf=maf,nobs = nobs,Timmer=Timmer,Memory=Memory,
        vgs = vgs, ves = ves, BLUP = BLUP, BLUP_Plus_Mean = BLUP_Plus_Mean,
        PEV = PEV, BLUE=BLUE))

#print("GAPIT.EMMAxP3D accomplished successfully!")
}#end of GAPIT.EMMAxP3D function

##############################################################################################
GAPIT.ZmatrixFormation <- function(Z,Y){
#Object: To expande the proportion Z to final Z
#Output: Z
#Authors: Zhiwu Zhang 
# Last update: April 22, 2011 
##############################################################################################

#split individuals in Y to the ones that are given Z and the one not
taxa.Z=as.matrix(Z[-1,1])
taxa.Y=as.matrix(Y[,1])
taxa.diff=setdiff(taxa.Y,taxa.Z)
taxa.I=as.matrix(taxa.Y[match(taxa.diff,taxa.Y,nomatch = 0)])
taxa.Z.col=as.matrix(Z[1,-1])

#Create final Z with zero block and identity block
Z0=matrix(data=0,nrow=nrow(taxa.Z),ncol=nrow(taxa.I))
Z1=diag(1,nrow(taxa.I))
ZC=as.matrix(rbind(Z0,Z1))

#To label rows and columns
label.row=rbind(as.matrix(Z[,1]),taxa.I)
label.col=t(taxa.I)

#update the zero block by the given Z matrix
position=t(as.matrix(match(taxa.Z.col,taxa.I,nomatch = 0)))
ZC[1:nrow(taxa.Z),position]=as.matrix(Z[-1,-1])

#habdler of parents do not have phenotype (colums of Z are not in taxa.I)
# To do list

#To form final Z matrix
dataPart=rbind(label.col,ZC)
Z=data.frame(cbind(label.row,dataPart))

#print("GAPIT.ZmatrixFormation accomplished successfully!")
return(Z)
}#The function GAPIT.ZmatrixFormation ends here

##############################################################################################
GAPIT.RemoveDuplicate <- function(Y){
#Object: NA
#Output: NA
#Authors: Zhiwu Zhang 
# Last update: Augus 30, 2011 
##############################################################################################
return (Y[match(unique(Y[,1]), Y[,1], nomatch = 0), ] )
}

##############################################################################################
GAPIT.QC <- function(Y,KI,GT,CV,Z){
#Object: to do data quality control
#Output: Y, KI, GD, CV, Z, flag
#Authors: Zhiwu Zhang and Alex Lipka 
# Last update: April 14, 2011 
##############################################################################################

#Remove duplicates 
print("Removing duplicates...")
Y=GAPIT.RemoveDuplicate(Y)
CV=GAPIT.RemoveDuplicate(CV)
Z=GAPIT.RemoveDuplicate(Z)


#Remove missing phenotype
print("Removing NaN...")
Y=Y[which(Y[,2]!="NaN"),]

# Remove duplicates: 
# GT row wise, Z column wise, and KI both direction.
if(exists("GT"))
{ 
taxa.kept=unique(GT[,1])
}

taxa.all=KI[,1]
taxa.uniqe=unique(taxa.all)
position=match(taxa.uniqe, taxa.all,nomatch = 0)
position.addition=cbind(1,t(1+position))
KI=KI[position,position.addition]

print("Maching Z with Kinship rowwise...")
if(exists("Z"))
{
taxa.all=as.matrix(Z[1,])
taxa.uniqe=intersect(taxa.all,taxa.all)
position=match(taxa.uniqe, taxa.all,nomatch = 0)
Z=Z[,position]
}


#Remove the columns of Z if they are not in KI/GT. KI/GT are allowed to have individuals not in Z
print("Maching Z with Kinship colwise...")
taxa.all=KI[,1]
taxa.kinship=unique(taxa.all)
taxa.Z=as.matrix(Z[1,])
#taxa.Z=colnames(Z) #This does not work for names starting with numerical or "-"
taxa.Z_K_common=intersect(taxa.kinship,taxa.Z)
Z <-cbind(Z[,1], Z[,match(taxa.Z_K_common, taxa.Z, nomatch = 0)])

#Remove the rows of Z if all the ellements sum to 0
print("Maching Z without origin...")
Z1=Z[-1,-1]
Z2=data.frame(Z1)
Z3=as.matrix(Z2)
Z4=as.numeric(Z3) #one dimemtion
Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1))
RS=rowSums(Z5)>0
#The above process could be simplified!
Z <- Z[c(TRUE,RS),]

#make individuals the same in Z, Y, GT and CV
print("Maching GT and CV...")
if(length(Z)<=1)stop("GAPIT says: there is no place to match IDs!")


# get intersect of all the data
taxa=intersect(Z[-1,1],Y[,1])
if(exists("GT"))taxa=intersect(taxa,taxa.kept)
if(exists("CV"))taxa=intersect(taxa,CV[,1])
if(length(taxa)<=1)stop("GAPIT says: There is no individual ID matched to covariate. Please check!")

#keep the common ones
t=c(TRUE, Z[-1,1]%in%taxa)
Z <- Z[t,]
if(length(t)<=2)stop("GAPIT says: There is no individual ID matched among data. Please check!")

#Remove the columns of Z if all the ellements sum to 0
print("QC final process...")
Z1=Z[-1,-1]
Z2=data.frame(Z1)
Z3=as.matrix(Z2)
Z4=as.numeric(Z3) #one dimemtion
Z5=matrix(data = Z4, nrow = nrow(Z1), ncol = ncol(Z1))
CS=colSums(Z5)>0
#The above process could be simplified!
Z <- Z[,c(TRUE,CS)]


Y <- Y[Y[,1]%in%taxa,]
if(exists("GT")) taxa.kept=data.frame(taxa.kept[taxa.kept%in%taxa])
if(exists("CV")) CV=CV[CV[,1]%in%taxa,]

#get position of taxa.kept in GT
position=match(taxa.kept[,1], GT[,1],nomatch = 0)

#To sort Y, GT, CV and Z
Y=Y[order(Y[,1]),]
CV=CV[order(CV[,1]),]
order.taxa.kept=order(taxa.kept[,1])
Z=Z[c(1,1+order(Z[-1,1])),]
GTindex=position[order.taxa.kept]
flag=nrow(Y)==nrow(Z)-1&nrow(Y)==nrow(GT)&nrow(Y)==nrow(CV)

print("GAPIT.QC accomplished successfully!")
return(list(Y = Y, KI = KI, GT = GT, CV = CV, Z = Z, GTindex=GTindex, flag=flag))
}#The function GAPIT.QC ends here

##############################################################################################
GAPIT.Compress <- function(KI,kinship.cluster = "average",kinship.group = "Mean",GN=nrow(KI),Timmer,Memory){
#Object: To cluster individuals into groups based on kinship
#Output: GA, KG
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: April 14, 2011 
##############################################################################################

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP start") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp start")

# Extract the line names
line.names <- KI[,1]

# Remove the first column of the kinship matrix, which is the line names
KI <- KI[ ,-1]

# Convert kinship to distance
distance.matrix <- 2 - KI 
distance.matrix.as.dist <- as.dist(distance.matrix)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP distance") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp distance")

#print(paste("The value of kinship.cluster is ", kinship.cluster, sep = ""))



# hclust() will perform the hiearchical cluster analysis
cluster.distance.matrix <- hclust(distance.matrix.as.dist, method = kinship.cluster)


Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cluster") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp cluster")

# Cutree will assign lines into k clusters
group.membership <- cutree(cluster.distance.matrix, k = GN) 

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP cutree") 
#Memory=GAPIT.Memory(Memory=Memory,Infor="cp cutree")

#calculate group kinship
if(kinship.group == "Mean"){
#This matrix ooperation is much faster than tapply function for  "Mean"
x=as.factor(group.membership)
#b = model.matrix(~x-1) 
n=max(as.numeric(as.vector(x)))
b=diag(n)[x,]

KG=t(b)%*%as.matrix(KI)%*%b
CT=t(b)%*%(0*as.matrix(KI)+1)%*%b
KG=as.matrix(KG/CT)
rownames(KG)=c(1:nrow(KG))
colnames(KG)=c(1:ncol(KG))

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation original")
#Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation original")
}

gm=as.factor(group.membership)
kv=as.numeric(as.matrix(KI))
kvr=rep(gm,ncol(KI))
kvc=as.numeric(t(matrix(kvr,nrow(KI),ncol(KI))))
kInCol=t(rbind(kv,kvr,kvc))




if(kinship.group != "Mean"){
#if (kinship.group == "Mean")
#    KG<- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), mean)

if (kinship.group == "Max")    
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), max)
if (kinship.group == "Min")   
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), min)    
if (kinship.group == "Median")  
    KG <- tapply(kInCol[,1], list(kInCol[,2], kInCol[,3]), median)  
} #this is end of brancing "Mean" and the rest
    
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP calculation") 
Memory=GAPIT.Memory(Memory=Memory,Infor="cp calculation")

# add line names 
GA <- data.frame(group.membership)
GA <- data.frame(cbind(as.character(line.names),as.numeric(group.membership) ))

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="CP Final") 
#Memory=GAPIT.Memory(Memory=Memory,Infor="CP Final")

#write.table(KG, paste("KG_from_", kinship.group, "_Method.txt"), quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)

#print("GAPIT.Compress accomplished successfully!")
return(list(GA=GA, KG=KG,Timmer=Timmer,Memory=Memory))
}#The function GAPIT.Compress ends here

##############################################################################################
GAPIT.Block <- function(Z,GA,KG){
#Object: To split a group kinship into two blocks containing individuals with and without phenotype
#Output: GAU,KW,KO,KWO
#Authors: Zhiwu Zhang and Alex Lipka 
# Last update: April 14, 2011 
##############################################################################################

# To separate group kiship into two blocks: with and without phenotype.
# A group goes to with phenotype as loog as it has one phenotyped individual.

#find position in group assignment (GA) for the individual associate with phenotype (specified by Z)
#taxa=unique(intersect(as.matrix(Z[1,-1]),GA[,1]))

taxa.Z=as.matrix(Z[1,-1])
taxa.GA=as.matrix(GA[,1])
position=taxa.GA%in%taxa.Z

#Initial block as 2
GAU=cbind(GA,2)

#Assign block as 1 if the individual has phenotype
GAU[position,3]=1

#Modify the non-phenotyped individuals if they in a group with phenotyped individuals
#To find the groups with phenotyped individuals
#update block assignment for all these groups
#get list of group that should be block 1

#grp.12=as.matrix(unique(GAU[,2]))
#grp.1=as.matrix(unique(GAU[which(GAU[,3]==1),2]))
#grp.2= as.matrix(setdiff(grp.12,grp.1))

grp.12=as.matrix(as.vector(unique(GAU[,2])) )
grp.1=as.matrix(as.vector(unique(GAU[which(GAU[,3]==1),2])) )
grp.2= as.matrix(as.vector(setdiff(grp.12,grp.1)))

numWithout=length(grp.2)

order.1=1:length(grp.1)
order.2=1:length(grp.2)
if(numWithout >0) grpblock=as.matrix(rbind(cbind(grp.1,1,order.1), cbind(grp.2,2,order.2)))
if(numWithout==0) grpblock=as.matrix(      cbind(grp.1,1,order.1),                       )

order.block=order(as.matrix(GAU[,3]))
colnames(grpblock)=c("grp","block","ID")

GAU0 <- merge(GAU[order.block,-3], grpblock, by.x = "X2", by.y = "grp")
GAU=GAU0[,c(2,1,3,4)]


KW=KG[grp.1,grp.1]
KO=KG[grp.2,grp.2]
KWO=KG[grp.1,grp.2]

#write.table(GAU, "GAU.txt", quote = FALSE, sep = "\t", row.names = TRUE,col.names = TRUE)

#print("GAPIT.Block accomplished successfully!")
return(list(GAU=GAU,KW=KW,KO=KO,KWO=KWO))
}#The function GAPIT.Block ends here


##############################################################################################
GAPIT.ZmatrixCompress <- function(Z,GAU){
#Object: To assign the fraction of a individual belonging to a group
#Output: Z
#Authors: Zhiwu Zhang
# Last update: April 14, 2011 
##############################################################################################

#sort Z column wise
order.Z=order(as.matrix(Z[1,-1]))
Z1=Z[-1,-1]
Z1 <- Z1[,order.Z]

#Extraction of GAU coresponding to Z, sort GAU rowwise to mach columns of Z, and make design matrix
effect.Z=as.matrix(Z[1,-1])
effect.GAU=as.matrix(GAU[,1])
GAU0=GAU[effect.GAU%in%effect.Z,]
order.GAU=order(GAU0[,1])
GAU1 <- GAU0[order.GAU,]
id.1=GAU1[which(GAU1[,3]==1),4]
n=max(as.numeric(as.vector(id.1)))
x=as.numeric(as.matrix(GAU1[,4]))
DS=diag(n)[x,]

#Z matrix from individual to group
Z1.numeric <- as.numeric(as.matrix(Z1))
Z1.matrix <- matrix(Z1.numeric, nrow = nrow(Z1), ncol = ncol(Z1)) 
Z2=Z1.matrix%*%DS

Z3=data.frame(cbind(as.character(Z[-1,1]),Z2))
Z=Z3[order(Z3[,1]),]



#print("GAPIT.ZmatrixCompress accomplished successfully!")
return(list(Z=Z))
}#The function GAPIT.ZmatrixCompress ends here

##############################################################################################
GAPIT.GS <- function(KW,KO,KWO,GAU,UW){
#Object: to derive BLUP for the individuals without phenotype
#Output: BLUP
#Authors: Zhiwu Zhang 
# Last update: April 17, 2011 
##############################################################################################

UO=try(t(KWO)%*%solve(KW)%*%UW)
if(inherits(UO, "try-error")) UO=t(KWO)%*%ginv(KW)%*%UW

n=ncol(UW) #get number of columns, add additional for individual name

#Assign BLUP of group to its individuals
BLUP=data.frame(as.matrix(GAU[,1:4]))

BLUP.W=BLUP[which(GAU[,3]==1),]
order.W=order(as.numeric(as.matrix(BLUP.W[,4])))
ID.W=as.numeric(as.matrix(BLUP.W[order.W,4]))
n.W=max(ID.W)
DS.W=diag(n.W)[ID.W,]
ind.W=DS.W%*%UW
all.W=cbind(BLUP.W[order.W,],ind.W)
all=all.W

BLUP.O=BLUP[which(GAU[,3]==2),]
if(nrow(BLUP.O)>0){
order.O=order(as.numeric(as.matrix(BLUP.O[,4])))
ID.O=as.numeric(as.matrix(BLUP.O[order.O,4]))
n.O=max(ID.O)
DS.O=diag(n.O)[ID.O,]
ind.O=DS.O%*%UO
all.O=cbind(BLUP.O[order.O,],ind.O)
all=rbind(all.W,all.O)
}

colnames(all)=c("Taxa", "Group", "RefInf","ID","BLUP","PEV")

#print("GAPIT.GS accomplished successfully!")
return(list(BLUP=all))
}#The function GAPIT.GS ends here


##############################################################################################
GAPIT.Numericalization1.Left <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="-"]="N"
x[x=="+"]="N"
x[x=="/"]="N"
x[x=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value

n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="N",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="N",1,ifelse(x==lev[1],0,2))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization1.Left function

##############################################################################################
GAPIT.Numericalization1.Right <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="-"]="N"
x[x=="+"]="N"
x[x=="/"]="N"
x[x=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value

n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="N",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="N",1,ifelse(x==lev[3],0,2))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization1.Right function



##############################################################################################
GAPIT.Numericalization1.Miss <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="-"]="N"
x[x=="+"]="N"
x[x=="/"]="N"
x[x=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value

n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="N",1,0)

#3 status
if(len==3)x=ifelse(x=="N",1,0)

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization1.Miss function



##############################################################################################
GAPIT.Numericalization1.Add <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="-"]="N"
x[x=="+"]="N"
x[x=="/"]="N"
x[x=="K"]="Z" #K (for GT genotype)is is replaced by Z to ensure heterozygose has the largest value

n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="N",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="N",1,ifelse(x==lev[1],0,ifelse(x==lev[3],1,2)))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization1.Add function

##############################################################################################
GAPIT.Numericalization1.Dom <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

x[x=="X"]="N"
x[x=="-"]="N"
x[x=="+"]="N"
x[x=="/"]="N"
x[x=="K"]="Z"



n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"N")
len=length(lev)
#print(lev)


# 2 or 3 status
x=ifelse((len<2|len>3),0,ifelse(x=="N",.5,ifelse((x=="R"|x=="Y"|x=="S"|x=="W"|x=="K"|x=="M"),0,1)))

return(matrix(x,length(x),1))
}#end of GAPIT.Numericalization1.Dom function

##############################################################################################
GAPIT.Numericalization2.Right <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
x[x=="XX"]="NN"
x[x=="--"]="NN"
x[x=="++"]="NN"
x[x=="//"]="NN"


n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)


#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="NN",1,ifelse(x==lev[3],0,2))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization2.Right function


##############################################################################################
GAPIT.Numericalization2.Left <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
x[x=="XX"]="NN"
x[x=="--"]="NN"
x[x=="++"]="NN"
x[x=="//"]="NN"
n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)


#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,2))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization2.Left function


##############################################################################################
GAPIT.Numericalization2.Miss <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
x[x=="XX"]="NN"
x[x=="--"]="NN"
x[x=="++"]="NN"
x[x=="//"]="NN"
n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)


#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="NN",1,0)

#3 status
if(len==3)x=ifelse(x=="NN",1,0)

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization2.Miss function

##############################################################################################
GAPIT.Numericalization2.Add <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
x[x=="XX"]="NN"
x[x=="--"]="NN"
x[x=="++"]="NN"
x[x=="//"]="NN"
n=length(x)
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)


#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x=0

#2 status
if(len==2)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,2))

#3 status
if(len==3)x=ifelse(x=="NN",1,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

# more than 3 status
if(len> 3){
x=0
warning("Existing none biallelic SNP!")
}

return(matrix(x,n,1))
}#end of GAPIT.Numericalization2.Add function

##############################################################################################
GAPIT.Numericalization2.Dom <- function(x){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################
x[x=="N"]="NN"
x[x=="X"]="NN"
x[x=="XX"]="NN"
x[x=="--"]="NN"
x[x=="++"]="NN"
x[x=="//"]="NN"
lev=levels(as.factor(x))
lev=setdiff(lev,"NN")
len=length(lev)
#print(lev)

#x1=ifelse(x=="NN",NA,ifelse(x==lev[1],0,ifelse(x==lev[2],1,2)))

#1 or less status
if(len<=1)x1=0

# more than 3 status
if(len> 3){
x1=0
warning("Existing none biallelic SNP!")
}

#2 status
if(len==2|len==3)x1=ifelse(x=="NN",1,ifelse(substr(x,1,1)==substr(x,2,2),0,2))

return(matrix(x1,length(x),1))
}#end of GAPIT.Numericalization2.Dom function

##############################################################################################
GAPIT.HapMap <- function(G,SNP.effect="Add",heading=TRUE){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

#print(paste("Converting hampmap format to numerical under model of ", model,sep=""))
#gc()
#GAPIT.Memory.Object(name.of.trait="HapMap.Start")

#GT=data.frame(G[1,-(1:11)])
if(heading){
GT= t(G[1,-(1:11)])
GI= G[-1,c(1,3,4)]
}else{
GT=NULL
GI= G[,c(1,3,4)]
}

#Initial GD
GD=NULL
bit=nchar(as.character(G[2,12])) #to determine number of bits of genotype
#print(paste("Number of bits for genotype: ", bit))

if(bit==2){
if(heading){
if(SNP.effect=="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Add)
if(SNP.effect=="Dom")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Dom)
if(SNP.effect=="Miss")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Miss)
if(SNP.effect=="Left")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Left)
if(SNP.effect=="Right")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization2.Right)
}else{
if(SNP.effect=="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Add)
if(SNP.effect=="Dom")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Dom)
if(SNP.effect=="Miss")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Miss)
if(SNP.effect=="Left")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Left)
if(SNP.effect=="Right")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization2.Right)
}
}

if(bit==1){
  if(heading){
    if(SNP.effect=="Add")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Add)
    if(SNP.effect=="Dom")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Dom)
    if(SNP.effect=="Miss")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Miss)
    if(SNP.effect=="Left")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Left)
    if(SNP.effect=="Right")GD= apply(G[-1,-(1:11)],1,GAPIT.Numericalization1.Right)
    }else{
    if(SNP.effect=="Add")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Add)
    if(SNP.effect=="Dom")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Dom)
    if(SNP.effect=="Miss")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Miss)
    if(SNP.effect=="Left")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Left)
    if(SNP.effect=="Right")GD= apply(G[,-(1:11)],1,GAPIT.Numericalization1.Right)
  }
}

if(heading)colnames(GT)="taxa"
colnames(GI)=c("SNP","Chromosome","Position")

#GAPIT.Memory.Object(name.of.trait="HapMap.Finished")
if(is.null(GD)){
  GT=NULL
  GI=NULL
}

#print(paste("Succesfuly finished converting hampmap which has bits of ", bit,sep=""))
return(list(GT=GT,GD=GD,GI=GI))
}#end of GAPIT.HapMap function


##############################################################################################
GAPIT.CVMergePC<- function(X,Y){
#Object: To convert character SNP genotpe to numerical
#Output: Coresponding numerical value
#Authors: Feng Tian and Zhiwu Zhang
# Last update: May 30, 2011 
##############################################################################################

#Z=X+Y

Z <- merge(X, Y, by.x = colnames(X)[1], by.y = colnames(Y)[1])

return(Z)
}#end of GAPIT.CVMergePCfunction



##############################################################################################
GAPIT.Memory <- function(Memory =NULL,Infor){
#Object: To report memory usage
#Output: Memory 
#Authors: Zhiwu Zhang
# Last update: June 6, 2011 
##############################################################################################
gc()
size <- memory.size()
#print(paste("Memory usage: ",size," for", Infor))
if(is.null(Memory)) {
Increased=0
Memory =cbind(Infor,size ,Increased)
}else{
Increased=0
Memory.current=cbind(Infor,size ,Increased)
Memory=rbind(Memory,Memory.current)
Memory[nrow(Memory),3]=as.numeric(as.matrix(Memory[nrow(Memory),2]))-as.numeric(as.matrix(Memory[nrow(Memory)-1,2]))
}

return (Memory)
}#end of GAPIT.Memory function

##############################################################################################
GAPIT.Memory.Object <- function(name.of.trait="Trait"){
# Object: To report memoery usage
# Authors: Heuristic Andrew
# http://heuristically.wordpress.com/2010/01/04/r-memory-usage-statistics-variable/
# Modified by Zhiwu Zhang
# Last update: may 29, 2011 
##############################################################################################
  
# print aggregate memory usage statistics 
print(paste('R is using', memory.size(), 'MB out of limit', memory.limit(), 'MB')) 
  
# create function to return matrix of memory consumption 
object.sizes <- function() 
{ 
    return(rev(sort(sapply(ls(envir=.GlobalEnv), function (object.name) 
        object.size(get(object.name)))))) 
} 

# export file in table format 
memory=object.sizes() 
file=paste("GAPIT.", name.of.trait,".Memory.Object.csv" ,sep = "")
write.table(memory, file, quote = FALSE, sep = ",", row.names = TRUE,col.names = TRUE)


# export file in PDF format 
pdf(paste("GAPIT.", name.of.trait,".Memory.Object.pdf" ,sep = ""))
# draw bar plot 
barplot(object.sizes(), 
    main="Memory usage by object", ylab="Bytes", xlab="Variable name", 
    col=heat.colors(length(object.sizes()))) 
# draw dot chart 
dotchart(object.sizes(), main="Memory usage by object", xlab="Bytes") 
# draw pie chart 
pie(object.sizes(), main="Memory usage by object")
dev.off()  
}  



##############################################################################################
GAPIT.Timmer <- function(Timmer=NULL,Infor){
#Object: To report current time
#Output: Timmer
#Authors: Zhiwu Zhang
# Last update: may 8, 2011 
##############################################################################################

Time<- Sys.time()
if(is.null(Timmer)) {
Elapsed=0
Timmer=cbind(Infor,Time,Elapsed)
}else{
Elapsed=0
Timmer.current=cbind(Infor,Time,Elapsed)
Timmer=rbind(Timmer,Timmer.current)
Timmer[nrow(Timmer),3]=as.numeric(as.matrix(Timmer[nrow(Timmer),2]))-as.numeric(as.matrix(Timmer[nrow(Timmer)-1,2]))
}

#print(paste('Time used: ', Timmer[nrow(Timmer),3], ' seconds for ',Infor,sep="" )) 
return (Timmer)
}#end of GAPIT.EMMAxP3D function


##############################################################################################
GAPIT.Log <- function(Y=Y,KI=KI,Z=Z,CV=CV,SNP.P3D=SNP.P3D,
				group.from = group.from ,group.to =group.to ,group.by = group.by ,kinship.cluster = kinship.cluster, kinship.group= kinship.group,
                      	ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait){
#Object: To report model factors
#Output: Text file (GAPIT.Log.txt)
#Authors: Zhiwu Zhang
# Last update: may 16, 2011 
##############################################################################################

#Creat storage
facto <- list(NULL)
value <- list(NULL)

#collecting model factors

facto[[1]]="Trait"
value[[1]]=paste(dim(Y))

facto[[2]]="group.by "
value[[2]]=group.by 

facto[[3]]="Trait name "
value[[3]]=name.of.trait

facto[[4]]="Kinship"
value[[4]]=dim(KI)

facto[[5]]="Z Matrix"
value[[5]]=dim(Z)

facto[[6]]="Covariate"
value[[6]]=dim(CV)

facto[[7]]="SNP.P3D"
value[[7]]=SNP.P3D

facto[[8]]="Clustering algorithms"
value[[8]]=kinship.cluster

facto[[9]]="Group kinship"
value[[9]]=kinship.group

facto[[10]]="group.from "
value[[10]]=group.from 

facto[[11]]="group.to "
value[[11]]=group.to 



theLog=as.matrix(cbind(facto,value))
#theLog=as.character(as.matrix(cbind(facto,value)))
colnames(theLog)=c("Model", "Value")
file=paste("GAPIT.", name.of.trait,".Log.csv" ,sep = "")
write.table(theLog, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

return (theLog)
}


##############################################################################################
GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure <- function(PWI = PWI, FDR.Rate = 0.05, FDR.Procedure = "BH"){
#Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure
#Output: PWIP, number.of.significant.SNPs
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: May 5, 2011 
##############################################################################################


    #Make sure that your compouter has the latest version of Bioconductor (the "Biobase" package) and multtest

if(is.null(PWI))
{
PWIP=NULL
number.of.significant.SNPs = 0
}

if(!is.null(PWI))
{  
 
    #library(multtest)
    
    if(dim(PWI)[1] == 1){
     PWIP <- cbind(PWI, PWI[4])
     colnames(PWIP)[5] <- "FDR_Adjusted_P-values"
    }
   
    if(dim(PWI)[1] > 1){ 
    #mt.rawp2adjp Performs the Simes procedure.  The output should be two columns, Left column: originial p-value
    #Right column: Simes corrected p-value
    res <- mt.rawp2adjp(PWI[,4], FDR.Procedure)

    #This command should order the p-values in the order of the SNPs in the data set
  adjp <- res$adjp[order(res$index), ]

  #round(adjp[1:7,],4)
    #Logical statment: 0, if Ho is not rejected; 1, if  Ho is rejected, by the Simes corrected p-value
#  temp <- mt.reject(adjp[,2], FDR.Rate)

    #Lists all number of SNPs that were rejected by the BY procedure
  #temp$r

    #Attach the FDR adjusted p-values to AS_Results

  PWIP <- cbind(PWI, adjp[,2])

    #Sort these data by lowest to highest FDR adjusted p-value
  PWIP <- PWIP[order(PWIP[,4]),]
  
  colnames(PWIP)[7] <- "FDR_Adjusted_P-values"
#  number.of.significant.SNPs = temp$r
  }
  #print("GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure accomplished successfully!")
}  
  #return(list(PWIP=PWIP, number.of.significant.SNPs = number.of.significant.SNPs))
  return(list(PWIP=PWIP))

}



##############################################################################################
GAPIT.Pruning <- function(values,DPP=5000){
#Object: To get index of subset that evenly distribute
#Output: Index
#Authors: Zhiwu Zhang
# Last update: May 28, 2011 
##############################################################################################

#No change if below the requirement
if(length(values)<=DPP)return(c(1:length(values)))

#values= log.P.values

values=sqrt(values)  #This shift the weight a little bit to the low building.
theMin=min(values)
theMax=max(values)
range=theMax-theMin
interval=range/DPP

ladder=round(values/interval)
ladder2=c(ladder[-1],0)
keep=ladder-ladder2
index=which(keep>0)

return(index)
}#end of GAPIT.Pruning 



##############################################################################################
GAPIT.QQ <- function(P.values, plot.type = "log_P_values", name.of.trait = "Trait",DPP=50000){
#Object: Make a QQ-Plot of the P-values
#Options for plot.type = "log_P_values" and "P_values" 
#Output: A pdf of the QQ-plot
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: May 9, 2011 
##############################################################################################


# Sort the data by the raw P-values
print("Sorting p values")
print(paste("Number of P values: ",length(P.values)))
if(length(P.values[P.values>0])<1) return(NULL)

DPP=round(DPP/4) #Reduce to 1/4 for QQ plot

P.values <- P.values[order(P.values)]
  
#Set up the p-value quantiles
print("Setting p_value_quantiles...")
p_value_quantiles <- (1:length(P.values))/(length(P.values)+1)


if(plot.type == "log_P_values")
{
    log.P.values <- -log10(P.values)
    log.Quantiles <- -log10(p_value_quantiles)
	
    index=GAPIT.Pruning(log.P.values,DPP=DPP)
    log.P.values=log.P.values[index ]
    log.Quantiles=log.Quantiles[index]

    pdf(paste("GAPIT.", name.of.trait,".QQ-Plot.pdf" ,sep = ""))
    par(mar = c(5,5,5,5))
    qqplot(log.Quantiles, log.P.values, xlim = c(0,max(log.Quantiles)), ylim = c(0,max(log.P.values)), 
           cex.axis=1.5, cex.lab=2, lty = 1, lwd = 1, col = "Blue" ,xlab =expression(Expected~~-log[10](italic(p))),
           ylab = expression(Observed~~-log[10](italic(p))), main = paste(name.of.trait,sep=" "))
    abline(a = 0, b = 1, col = "red")
    dev.off()   
}


if(plot.type == "P_values")
{
  pdf(paste("QQ-Plot_", name.of.trait,".pdf" ,sep = ""))
  par(mar = c(5,5,5,5))
  qqplot(p_value_quantiles, P.values, xlim = c(0,1), 
         ylim = c(0,1), type = "l" , xlab = "Uniform[0,1] Theoretical Quantiles", 
         lty = 1, lwd = 1, ylab = "Quantiles of P-values from GWAS", col = "Blue",
         main = paste(name.of.trait,sep=" "))
  abline(a = 0, b = 1, col = "red")
  dev.off()   
}


  #print("GAPIT.QQ  accomplished successfully!")
  

}


##############################################################################################
GAPIT.Manhattan <- function(GI.MP = NULL, name.of.trait = "Trait", 
                   plot.type = "Genomewise",DPP=50000){
#Object: Make a Manhattan Plot
#Options for plot.type = "Separate_Graph_for_Each_Chromosome" and "Same_Graph_for_Each_Chromosome" 
#Output: A pdf of the Manhattan Plot
#Authors: Alex Lipka, Zhiwu Zhang, and Meng Li 
# Last update: May 10, 2011 
##############################################################################################

print("Manhattan ploting...")

#do nothing if null input
if(is.null(GI.MP)) return

GI.MP=matrix(as.numeric(as.matrix(GI.MP) ) ,nrow(GI.MP),ncol(GI.MP))

#Remove all SNPs that do not have a choromosome and bp position
GI.MP <- GI.MP[!is.na(GI.MP[,1]),]
GI.MP <- GI.MP[!is.na(GI.MP[,2]),]

#Remove all SNPs that have P values above 0 (not na etc)
GI.MP <- GI.MP[GI.MP[,3]>0,]

#Replace P the -log10 of the P-values
GI.MP[,3] <-  -log10(GI.MP[,3])
y.lim <- ceiling(max(GI.MP[,3]))
chm.to.analyze <- unique(GI.MP[,1]) 
chm.to.analyze=chm.to.analyze[order(chm.to.analyze)]
numCHR= length(chm.to.analyze)

#Chromosomewise plot
if(plot.type == "Chromosomewise")
{
  pdf(paste("GAPIT.", name.of.trait,".Manhattan-Plot.Chromosomewise.pdf" ,sep = ""), width = 10)
  par(mar = c(5,5,4,3), lab = c(8,5,7))
  for(i in 1:numCHR)
  {
    #Extract SBP on this chromosome
    subset=GI.MP[GI.MP[,1]==chm.to.analyze[i],]
  	#print(paste("CHR: ",i, " #SNPs: ",length(subset),sep=""))
  	#print(dim(subset))
  	#print((subset))
  	
  	if(length(subset)>3){
      x <- as.numeric(subset[,2])/10^(6)
      y <- as.numeric(subset[,3])
    }else{
      x <- as.numeric(subset[2])/10^(6)
      y <- as.numeric(subset[3])
    }
 	  #print(paste("befor prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x)))


        
  	#Prune most non important SNPs off the plots
    order=order(y,decreasing = TRUE)
    y=y[order]
    x=x[order]
    
    index=GAPIT.Pruning(y,DPP=round(DPP/numCHR))
      	
  	x=x[index]
  	y=y[index]
  	
 	  #print(paste("after prune: chr: ",i, "length: ",length(x),"max p",max(y), "min p",min(y), "max x",max(x), "Min x",min(x)))
 	  
    #color.vector <- subset(temp.par.data[,7], temp.par.data[,4] == i)
    plot(y~x,type="p", ylim=c(0,y.lim), xlim = c(min(x), max(x)), col = "navy", xlab = expression(Base~Pairs~(x10^-6)), ylab = "-Log Base 10 p-value", main = paste("Chromosome",chm.to.analyze[i],sep=" "))
  	#print("manhattan plot (chr) finished")    
  }
  dev.off()
  print("manhattan plot on chromosome finished")
} #Chromosomewise plot


#Genomewise plot
if(plot.type == "Genomewise")
{
  
  GI.MP <- GI.MP[order(GI.MP[,2]),]
  GI.MP <- GI.MP[order(GI.MP[,1]),]
  color.vector <- rep(c("orangered","navyblue"),numCHR)
  ticks=NULL
  lastbase=0
  
  #change base position to accumulatives
  for (i in chm.to.analyze)
  {
    index=(GI.MP[,1]==i)
    ticks <- c(ticks, lastbase+mean(GI.MP[index,2])) 
    GI.MP[index,2]=GI.MP[index,2]+lastbase
    lastbase=max(GI.MP[index,2])
  }

    x0 <- as.numeric(GI.MP[,2])
    y0 <- as.numeric(GI.MP[,3])
    z0 <- as.numeric(GI.MP[,1])
	position=order(y0,decreasing = TRUE)
    index0=GAPIT.Pruning(y0[position],DPP=DPP)
	index=position[index0]
	x=x0[index]
	y=y0[index]
	z=z0[index]

  pdf(paste("GAPIT.", name.of.trait,".Manhattan-Plot.Genomewise.pdf" ,sep = ""), width = 10)
  par(mar = c(5,5,5,1))

 plot(y~x,xlab=expression(Chromosome),ylab=expression(-log[10](italic(p))) ,
       cex.lab=2,col=ifelse(z%%2==0,"orangered","navy"),axes=FALSE,type = "p",pch=20,main = paste(name.of.trait,sep=" "))

 axis(1, at=ticks,cex.axis=1.5,labels=chm.to.analyze,tick=F)
  axis(2, at=1:y.lim,cex.axis=1.5,labels=1:y.lim,tick=F)
  box()
  dev.off()
} #Genomewise plot

  #print("GAPIT.Manhattan accomplished successfully!")
} #end of GAPIT.Manhattan


##############################################################################################
GAPIT.Compression.Visualization <- function(Compression = Compression, name.of.trait = name.of.trait){
#Object: Conduct the Benjamini-Hochberg FDR-Controlling Procedure
#Output: Three pdfs: One of the log likelihood function, one of the genetic and error variance component,
#                    and one of the heritabilities
#Authors: Alex Lipka and Zhiwu Zhang 
# Last update: May 10, 2011 
##############################################################################################

#Graph the optimum compression 

#print("Compression")
#print(Compression)

if(length(Compression)<=6) Compression=t(as.matrix(Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),]))
if(length(Compression)==6) Compression=matrix(Compression,1,6) 
#print("Compression matrix")
#print(Compression)
#print(length(Compression) )

if(length(Compression)>6) Compression=Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),]

#print("Checking")
if(length(Compression)<1) return() #no result

#Pie chart for the optimum setting
#-------------------------------------------------------------------------------
print("Pie chart")
LL=as.numeric(Compression[,4])
Compression.best=Compression[1,] 
variance=as.numeric(Compression.best[5:6])
colors <- c("grey50","grey70")
labels0 <- round(variance/sum(variance) * 100, 1)
labels <- paste(labels0, "%", sep="")
LL.best0=as.numeric(Compression.best[4]  )
LL.best=floor(LL.best0*100)/100
theOptimum=paste(c(Compression.best[c(1:3)],LL.best) )

pdf(paste("GAPIT.", name.of.trait,".Optimum.pdf", sep = ""), width = 14)
par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7))
pie(variance,  col=colors, labels=labels,angle=45)
legend(1.0, 0.5, c("Genetic variance","Residual variance"), cex=1.5, 
   fill=colors)

#Display the optimum compression
text(1.5,.0, "The optimum compression", col= "red")
for(i in 1:4){
text(1.5,-.1*i, theOptimum[i], col= "red")
}
dev.off() 

#sort Compression by group number for plot order
Compression=Compression[order(as.numeric(Compression[,3])),]

#Graph compression with multiple groups
#print("Graph compression with multiple groups")


if(length(Compression)==6) return() #For to exit if only one row


#print("It should not go here")

if(length(unique(Compression[,3]))>1)
{
#Create a vector of colors
#print("Setting colors")
color.vector.basic <- c("red","blue","black", "blueviolet","indianred","cadetblue","orange")
color.vector.addition <- setdiff(c(colors()[grep("red",colors())], colors()[grep("blue",colors())]),color.vector.basic )
color.vector.addition.mixed <- sample(color.vector.addition,max(0,((length(unique(Compression[,1])) * length(unique(Compression[,2])))-length(color.vector.basic))))  
color.vector <- c(color.vector.basic,color.vector.addition.mixed )


#Create a vector of numbers for the line dot types
line.vector <-  rep(1:(length(unique(Compression[,1])) * length(unique(Compression[,2]))))

#We want to have a total of three plots, one displaying the likelihood function, one displaying the variance components, and one displaying the
# heritability 

pdf(paste("GAPIT.", name.of.trait,".Compression.multiple.group.", ".pdf", sep = ""), width = 14)
par(mfrow = c(2,3), mar = c(5,5,1,1), lab = c(5,5,7))

# Make the likelihood function plot
#print("Likelihood")
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,4])  
      plot(y~x,type="l", pch = 30, lty = line.vector[i], ylim=c(min(as.numeric(Compression[,4])),max(as.numeric(Compression[,4]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "-2Log Likelihoood", )
      label = paste(c(as.character(unique(Compression[,1]))[k]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,4])  
      lines(y~x,type="l", pch = 30, lty = line.vector[i], col = color.vector[j])
      label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 

 

# Make the genetic variance component plots
#print("genetic variance")
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,5])  
      plot(y~x,type="l", pch = 17,  lty = line.vector[i], ylim=c(min(as.numeric(Compression[,5])),max(as.numeric(Compression[,5]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "Genetic Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,5])  
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topleft",  label, fill = color.vector) 


# Make the residual variance component plots
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,6])  
      plot(y~x,type="l", pch = 17,  ylim=c(min(as.numeric(Compression[,6])),max(as.numeric(Compression[,6]))), xlim = c(min(as.numeric(Compression[,3])),max(as.numeric(Compression[,3]))),
      col = color.vector[j], xlab = "Number of Groups", ylab = "Residual Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression[which( (Compression[,1] == as.character(unique(Compression[,1])[i])) & (Compression[,2] == as.character(unique(Compression[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,6])  
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 


#calculate total variance and h2
#print("h2")
heritablilty.vector <- as.numeric(Compression[,5])/(as.numeric(Compression[,5]) + as.numeric(Compression[,6]))
totalVariance.vector <- as.numeric(as.numeric(Compression[,5]) + as.numeric(Compression[,6]))
Compression.h2 <- cbind(Compression, heritablilty.vector,totalVariance.vector)

# Make the total variance component plots
#print("Total variance")
k <- 1
for(i in 1:length(unique(Compression.h2[,1]))){
  for(j in 1:length(unique(Compression.h2[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,8])  
      plot(y~x,type="l", pch = 17,  lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,8])),max(as.numeric(Compression.h2[,8]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))),
      col = color.vector[1], xlab = "Number of Groups", ylab = "Total Variance", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,8]) 
      lines(y~x,type="l", pch = 17, lty = line.vector[i], col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }  
   }
 }
 #Make a legend
  #legend("topright",  label, fill = color.vector) 
  

# Make the heritability plots 
#print("h2 plot")
k <- 1
for(i in 1:length(unique(Compression[,1]))){
  for(j in 1:length(unique(Compression[,2]))){

     if((i == 1)&(j == 1)) {
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,7]) 

      plot(y~x,type="l", pch = 17,  lty = line.vector[k], ylim=c(min(as.numeric(Compression.h2[,7])),max(as.numeric(Compression.h2[,7]))), xlim = c(min(as.numeric(Compression.h2[,3])),max(as.numeric(Compression.h2[,3]))),
      col = color.vector[1], xlab = "Number of Groups", ylab = "Heritability", )
      #label = paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = "")
      }
  
    if((i != 1)|(j != 1)) {
      k <- k+1   
      Compression.subset <- Compression.h2[which( (Compression.h2[,1] == as.character(unique(Compression.h2[,1])[i])) & (Compression.h2[,2] == as.character(unique(Compression.h2[,2])[j]))  ),              ]
      x <- as.numeric(Compression.subset[,3])
      y <- as.numeric(Compression.subset[,7])  
      lines(y~x,type="l", lty = line.vector[i], pch = 17, col = color.vector[j])
      #label = c(label, paste(c(as.character(unique(Compression[,1]))[i]," ",as.character(unique(Compression[,2]))[j]), collapse = ""))
      }       
   }
 }
 
 #Make a legend
  #legend("topleft",  label, fill = color.vector) 
  
legend.col= 1+floor(length(unique(Compression[,1])) * length(unique(Compression[,2]))/20)
line.style=rep(1:length(unique(Compression[,1])), each = length(unique(Compression[,2])))      
line.color=rep(1:length(unique(Compression[,2])), length(unique(Compression[,1])))



# Make labels
      plot(0~0,axes=FALSE,type="l",ylab = "",xlab = "",frame.plot=FALSE)
      legend("topleft",  label, col = color.vector[line.color], lty = line.style, ncol=legend.col,horiz=FALSE) 
   
 
dev.off()
}#end of Graph compression with multiple groups

#Graph compression with single groups
#print("Graph compression with single groups")
if(length(unique(Compression[,3]))==1& length(unique(Compression[,1]))*length(unique(Compression[,2]))>1)
{

#Graph the compression with only one group
pdf(paste("GAPIT.Compression.single.group.", name.of.trait, ".pdf", sep = ""), width = 14)
par(mfrow = c(2,2), mar = c(5,5,1,1), lab = c(5,5,7))

nkt=length(unique(Compression[,1]))
nca=length(unique(Compression[,2]))
kvr=rep(c(1:nkt),nca)
kvc0=rep(c(1:nca),nkt)
kvc=as.numeric(t(matrix(kvc0,nca,nkt)))
kt.name=Compression[1:nkt,1]

ca.index=((1:nca)-1)*nkt+1
ca.name=Compression[ca.index,2]

KG<- t(tapply(as.numeric(Compression[,4]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "-2 Log Likelihood",beside=TRUE, col=rainbow(length(unique(Compression[,2]))))


KG<- t(tapply(as.numeric(Compression[,5]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Genetic varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2]))))

KG<- t(tapply(as.numeric(Compression[,6]), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Residual varaince", beside=TRUE, col=rainbow(length(unique(Compression[,2]))))

KG<- t(tapply(as.numeric(Compression[,5])/(as.numeric(Compression[,5])+as.numeric(Compression[,6])), list(kvr, kvc), mean))
colnames(KG)=kt.name
barplot(as.matrix(KG),  ylab= "Heritability", beside=TRUE, col=rainbow(length(unique(Compression[,2]))),ylim=c(0,1))

legend("topleft", paste(t(ca.name)), cex=0.8,bty="n", fill=rainbow(length(unique(Compression[,2]))),horiz=TRUE)
dev.off() 
} #end of Graph compression with single groups

print("GAPIT.Compression.Visualization accomplished successfully!")

}#GAPIT.Compression.Plots ends here





##############################################################################################
GAPIT.Table <- function(final.table = final.table, name.of.trait = name.of.trait,SNP.FDR=1){
#Object: Make and export a table of summary information from GWAS
#Output: A table summarizing GWAS results
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: May 10, 2011 
##############################################################################################

#Filter SNPs by FDR
index=(final.table[,7]<=SNP.FDR)
final.table=final.table[index,]

#Export this summary table as an excel file
write.table(final.table, paste("GAPIT.", name.of.trait, ".GWAS.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)


#print("GAPIT.Table accomplished successfully!")
  

}   #GAPIT.Table ends here



##############################################################################################
GAPIT.GS.Visualization <- function(gsBLUP = gsBLUP, BINS=BINS, name.of.trait = name.of.trait){
#Object: To build heat map to show distribution of BLUP and PEV
#Output: pdf
#Authors: Zhiwu Zhang 
# Last update: May 15, 2011 
##############################################################################################


nBin=BINS

BLUP= gsBLUP[,5]
PEV = gsBLUP[,6]

if(BLUP[1]=="NaN"){
  warning ("It was not converged. BLUP was not created!")
}
if(BLUP[1]!="NaN")
{


BLUP.max=try(max(BLUP))
BLUP.min=try(min(BLUP))
if(inherits(BLUP.max, "try-error"))  return()

  range.BLUP=BLUP.max-BLUP.min
  range.PEV=max(PEV)-min(PEV)
  
  interval.BLUP=range.BLUP/nBin
  interval.PEV=range.PEV/nBin
  
  
  bin.BLUP=floor(BLUP/max(BLUP)*nBin)*max(BLUP)/nBin
  bin.PEV=floor(PEV/max(PEV)*nBin)*max(PEV)/nBin
  
  
  distinct.BLUP=unique(bin.BLUP)
  distinct.PEV=unique(bin.PEV)
  
  if((length(distinct.BLUP)<2)  | (length(distinct.PEV)<2) ) return() #nothing to plot
  
  Position.BLUP=match(bin.BLUP,distinct.BLUP,nomatch = 0)
  Position.PEV=match(bin.PEV,distinct.PEV,nomatch = 0)
  
  value=matrix(1,length(Position.BLUP))
  KG<- (tapply(as.numeric(value), list(Position.BLUP, Position.PEV), sum))
  
  rownames(KG)=round(distinct.BLUP, digits = 4)
  colnames(KG)=round(distinct.PEV, digits = 4)
  
  #Sort the rows and columns in order from smallest to largest
  
  rownames(KG) <- rownames(KG)[order(as.numeric(rownames(KG)))]
  colnames(KG) <- colnames(KG)[order(as.numeric(colnames(KG)))]
  
  #write.table(KG, "Input_Matrix_for_GS_Heat_Map.txt", quote = FALSE, sep = "\t", row.names = FALSE,col.names = FALSE)

  pdf(paste("GAPIT.", name.of.trait,".GPS.BLUPvsPEV", ".pdf", sep = ""),width = 9)
  #par(mfrow = c(1,1), mar = c(1,1,5,5), lab = c(5,5,7))
  par(mar = c(5,5,6,5))
  
  nba_heatmap <- heatmap(KG, Rowv=NA, Colv=NA,  col =  rev(heat.colors(256)),   scale="column", 
  xlab = "PEV", ylab = "BLUP", main = " ")

  #nba_heatmap <- heatmap.2(KG,  cexRow =.2, cexCol = 0.2, scale="none", symkey=FALSE, trace="none" )
 
  
  #cexRow =0.9, cexCol = 0.9)
  dev.off() 
}
#print("GAPIT.GS.Visualization accomplished successfully!")

}   #GAPIT.GS.Visualization ends here


      
##############################################################################################
GAPIT.kinship.loiselle <- function(snps, method="additive", use="all") {
# Object: To calculate the kinship matrix using the method of Loiselle et al. (1995)
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 


  #Number of SNP types that are 0s
  n0 <- sum(snps==0,na.rm=TRUE)
  #Number of heterozygote SNP types
  nh <- sum(snps==1,na.rm=TRUE)
  #Number of SNP types that are 1s
  n1 <- sum(snps==2,na.rm=TRUE)
  #Number of SNP types that are missing
  nNA <- sum(is.na(snps))
  

 
  #Self explanatory
  dim(snps)[1]*dim(snps)[2]
  #stopifnot(n0+nh+n1+nNA == length(snps))

    
  #Note that the two lines in if(method == "dominant") and if(method == "recessive") are found in
  #if(method == "additive").  Worry about this only if you have heterozygotes, which you do not.
  if ( method == "dominant" ) {
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) > 1),ncol(snps),nrow(snps))
    snps[!is.na(snps) && (snps == 1)] <- flags[!is.na(snps) && (snps == 1)]
  }
  else if ( method == "recessive" ) {
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) < 1),ncol(snps),nrow(snps))
    snps[!is.na(snps) && (snps == 1)] <- flags[!is.na(snps) && (snps == 1)]
  }
  else if ( ( method == "additive" ) && ( nh > 0 ) ) {
    dsnps <- snps
    rsnps <- snps
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) > 1),ncol(snps),nrow(snps))
    dsnps[!is.na(snps) && (snps==1)] <- flags[is.na(snps) && (snps==1)]
    flags <- matrix(as.double(colMeans(snps,na.rm=TRUE) < 1),ncol(snps),nrow(snps))
    rsnps[!is.na(snps) && (snps==1)] <- flags[is.na(snps) && (snps==1)]
    snps <- cbind(dsnps,rsnps)
  }

  #mafs is a (# lines)x(# SNPs)x matrix.  The rows mafs are identical, and the ij^th element is the average
  #allele frequency for the SNP in the j^th column.
  
  #if(use == "all") imputes missing SNP type values with the expected (average) allele frequency.
  if ( use == "all" ) {
    mafs <- matrix(colMeans(snps,na.rm=TRUE),ncol(snps),nrow(snps))
    snps[is.na(snps)] <- mafs[is.na(snps)]
  }
  else if ( use == "complete.obs" ) {
    mafs <- matrix(colMeans(snps,na.rm=TRUE),ncol(snps),nrow(snps))
    snps <- snps[colSums(is.na(snps))==0,]
  }
  mafs_comp <- 1-mafs
  snps_comp <- 1-snps
  

  n <- nrow(snps)
  K <- matrix(nrow=n,ncol=n)
  diag(K) <- 1
  #Create the k term on page 1422 of Loiselle et al. (1995)

  missing <- rep(NA, dim(snps)[2])  
  for(i in 1:dim(snps)[2]) {
    missing[i] <- sum(is.na(snps[,i]))
  }
  

  for(i in 1:(n-1)) {
    for(j in (i+1):n) {
      Num_First_Term_1 <- (snps[i,]-mafs[i,])*(snps[j,]-mafs[j,])
      Num_First_Term_2 <- (snps_comp[i,]-mafs_comp[i,])*(snps_comp[j,]-mafs_comp[j,])
      First_Term <- sum(Num_First_Term_1)+sum(Num_First_Term_2)

      Num_Second_Term_1 <- mafs[,i]*(1-mafs[,i])
      Num_Second_Term_2 <- mafs_comp[,i]*(1-mafs_comp[,i])
      Num_Second_Term_Bias_Correction <- 1/((2*n)-missing - 1)
      Num_Second_Term <-  Num_Second_Term_1 + Num_Second_Term_2
      Second_Term <- sum(Num_Second_Term*Num_Second_Term_Bias_Correction)

      Third_Term <- sum(Num_Second_Term) 
      
      f <- (First_Term + Second_Term)/Third_Term
      if(f <= 0){
            K[i,j] <- 0
      }
      else{
            K[i,j] <- f
      }
      K[j,i] <- K[i,j]
    }
  }
  return(K)
}


##############################################################################################

GAPIT.PCA <- function(X,taxa, PC.number = min(ncol(X),nrow(X))){
# Object: Conduct a principal component analysis, and output the prinicpal components into the workspace,
#         a text file of the principal components, and a pdf of the scree plot
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 

#Conduct the PCA 
PCA.X <- prcomp(X)

#Create a Scree plot 
pdf("GAPIT.PCA.eigenValue.pdf", width = 12, height = 12)
par(mar = c(5,5,5,5))
screeplot(PCA.X, type="lines")
dev.off()

pdf("GAPIT.PCA.pdf", width = 12, height = 12)
par(mar = c(5,5,5,5))
for(i in 1:(as.numeric(PC.number[1])-1)){
for(j in (i+1):(as.numeric(PC.number[1]))){
plot(PCA.X$x[,i],PCA.X$x[,j],xlab=paste("PC",i,sep=""),ylab=paste("PC",j,sep=""),pch=1,col="red",cex=1.0,cex.lab=1.5, cex.axis=1.2, lwd=2,las=1)
}
}
dev.off()

#Extract number of PCs needed
PCs <- cbind(taxa,as.data.frame(PCA.X$x[,1:PC.number]))

#Remove duplicate (This is taken care by QC)
#PCs.unique <- unique(PCs[,1])
#PCs <-PCs[match(PCs.unique, PCs[,1], nomatch = 0), ]

#Write the PCs into a text file
write.table(PCs, "GAPIT.PCA.csv", quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

#Return the PCs
return(PCs)
}



      
##############################################################################################
GAPIT.kinship.loiselle <- function(snps, method="additive", use="all") {
# Object: To calculate the kinship matrix using the method of Loiselle et al. (1995)
# Authors: Alex Lipka and Hyun Min Kang
# Last update: May 31, 2011 
############################################################################################## 
 
  #Number of SNP types that are 0s
  n0 <- sum(snps==0,na.rm=TRUE)
  #Number of heterozygote SNP types
  nh <- sum(snps==0.5,na.rm=TRUE)
  #Number of SNP types that are 1s
  n1 <- sum(snps==1,na.rm=TRUE)
  #Number of SNP types that are missing
  nNA <- sum(is.na(snps))
  

 
  #Self explanatory
  dim(snps)[1]*dim(snps)[2]
  #stopifnot(n0+nh+n1+nNA == length(snps))

    
  #Note that the two lines in if(method == "dominant") and if(method == "recessive") are found in
  #if(method == "additive").  Worry about this only if you have heterozygotes, which you do not.
  if ( method == "dominant" ) {
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))
    snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)]
  }
  else if ( method == "recessive" ) {
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))
    snps[!is.na(snps) && (snps == 0.5)] <- flags[!is.na(snps) && (snps == 0.5)]
  }
  else if ( ( method == "additive" ) && ( nh > 0 ) ) {
    dsnps <- snps
    rsnps <- snps
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))
    dsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)]
    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))
    rsnps[!is.na(snps) && (snps==0.5)] <- flags[is.na(snps) && (snps==0.5)]
    snps <- rbind(dsnps,rsnps)
  }

  #mafs is a (# SNPs)x(# lines) matrix.  The columns of mafs are identical, and the ij^th element is the average
  #allele frequency for the SNP in the i^th row.
  
  #if(use == "all") imputes missing SNP type values with the expected (average) allele frequency.
  if ( use == "all" ) {
    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))
    snps[is.na(snps)] <- mafs[is.na(snps)]
  }
  else if ( use == "complete.obs" ) {
    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))
    snps <- snps[rowSums(is.na(snps))==0,]
  }
  mafs_comp <- 1-mafs
  snps_comp <- 1-snps
  

  n <- ncol(snps)
  K <- matrix(nrow=n,ncol=n)
  diag(K) <- 1
  #Create the k term on page 1422 of Loiselle et al. (1995)

  missing <- rep(NA, dim(snps)[1])  
  for(i in 1:dim(snps)[1]) {
    missing[i] <- sum(is.na(snps[i,]))
  }
  

  for(i in 1:(n-1)) {
    for(j in (i+1):n) {
      Num_First_Term_1 <- (snps[,i]-mafs[,i])*(snps[,j]-mafs[,j])
      Num_First_Term_2 <- (snps_comp[,i]-mafs_comp[,i])*(snps_comp[,j]-mafs_comp[,j])
      First_Term <- sum(Num_First_Term_1)+sum(Num_First_Term_2)

      Num_Second_Term_1 <- mafs[,i]*(1-mafs[,i])
      Num_Second_Term_2 <- mafs_comp[,i]*(1-mafs_comp[,i])
      Num_Second_Term_Bias_Correction <- 1/((2*n)-missing - 1)
      Num_Second_Term <-  Num_Second_Term_1 + Num_Second_Term_2
      Second_Term <- sum(Num_Second_Term*Num_Second_Term_Bias_Correction)

      Third_Term <- sum(Num_Second_Term) 
      
      f <- (First_Term + Second_Term)/Third_Term

      K[i,j] <- f
      if(K[i,j]<0) K[i,j]=0
      
      K[j,i] <- K[i,j]
    }
  }
  return(K)
}

##############################################################################################
GAPIT.replaceNaN <- function(LL) {
#handler of grids with NaN log
#Authors: Zhiwu Zhang
# Last update: may 12, 2011 
##############################################################################################

#handler of grids with NaN log 
index=(LL=="NaN")
if(length(index)>0) theMin=min(LL[!index])
if(length(index)<1) theMin="NaN"
LL[index]=theMin
return(LL)    
}

################################################################################################################
emma.REMLE <- function(y, X, K, Z=NULL, ngrids=100, llim=-10, ulim=10,
              esp=1e-10, eig.L = NULL, eig.R = NULL) {
# Authors: Hyun Min Kang
# Modified (only one line) by Zhiwu Zhang to handle non-defined LL ("NaN") by replacing it with the worst LL.
# Last update: June 8, 2011 
################################################################################################################


  n <- length(y)
  t <- nrow(K)
  q <- ncol(X)

#  stopifnot(nrow(K) == t)
  stopifnot(ncol(K) == t)
  stopifnot(nrow(X) == n)

  if ( det(crossprod(X,X)) == 0 ) {
    warning("X is singular")
    return (list(REML=0,delta=0,ve=0,vg=0))
  }

  if ( is.null(Z) ) {
    if ( is.null(eig.R) ) {
      eig.R <- emma.eigen.R.wo.Z(K,X)
    }
    etas <- crossprod(eig.R$vectors,y)
  
    logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim
    m <- length(logdelta)
    delta <- exp(logdelta)
    Lambdas <- matrix(eig.R$values,n-q,m) + matrix(delta,n-q,m,byrow=TRUE)
    Etasq <- matrix(etas*etas,n-q,m)
    LL <- 0.5*((n-q)*(log((n-q)/(2*pi))-1-log(colSums(Etasq/Lambdas)))-colSums(log(Lambdas)))
    dLL <- 0.5*delta*((n-q)*colSums(Etasq/(Lambdas*Lambdas))/colSums(Etasq/Lambdas)-colSums(1/Lambdas))
    
    optlogdelta <- vector(length=0)
    optLL <- vector(length=0)
    if ( dLL[1] < esp ) {
      optlogdelta <- append(optlogdelta, llim)
      optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim,eig.R$values,etas))
    }
    if ( dLL[m-1] > 0-esp ) {
      optlogdelta <- append(optlogdelta, ulim)
      optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim,eig.R$values,etas))
    }

    for( i in 1:(m-1) )
      {
        if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) 
        {
          r <- uniroot(emma.delta.REML.dLL.wo.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas=etas)
          optlogdelta <- append(optlogdelta, r$root)
          optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root,eig.R$values, etas))
        }
      }
#    optdelta <- exp(optlogdelta)
  }
  else {
    if ( is.null(eig.R) ) {
      eig.R <- emma.eigen.R.w.Z(Z,K,X)
    }
    etas <- crossprod(eig.R$vectors,y)
    etas.1 <- etas[1:(t-q)]
    etas.2 <- etas[(t-q+1):(n-q)]
    etas.2.sq <- sum(etas.2*etas.2)
  
    logdelta <- (0:ngrids)/ngrids*(ulim-llim)+llim
    m <- length(logdelta)
    delta <- exp(logdelta)
    Lambdas <- matrix(eig.R$values,t-q,m) + matrix(delta,t-q,m,byrow=TRUE)
    Etasq <- matrix(etas.1*etas.1,t-q,m)
    dLL <- 0.5*delta*((n-q)*(colSums(Etasq/(Lambdas*Lambdas))+etas.2.sq/(delta*delta))/(colSums(Etasq/Lambdas)+etas.2.sq/delta)-(colSums(1/Lambdas)+(n-t)/delta))
    
    optlogdelta <- vector(length=0)
    optLL <- vector(length=0)
    if ( dLL[1] < esp ) {
      optlogdelta <- append(optlogdelta, llim)
      optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim,eig.R$values,etas.1,n,t,etas.2.sq))
    }
    if ( dLL[m-1] > 0-esp ) {
      optlogdelta <- append(optlogdelta, ulim)
      optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim,eig.R$values,etas.1,n,t,etas.2.sq))
    }

    for( i in 1:(m-1) )
      {
        if ( ( dLL[i]*dLL[i+1] < 0 ) && ( dLL[i] > 0 ) && ( dLL[i+1] < 0 ) ) 
        {
          r <- uniroot(emma.delta.REML.dLL.w.Z, lower=logdelta[i], upper=logdelta[i+1], lambda=eig.R$values, etas.1=etas.1, n=n, t1=t, etas.2.sq = etas.2.sq )
          optlogdelta <- append(optlogdelta, r$root)
          optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root,eig.R$values, etas.1, n, t, etas.2.sq ))
        }
      }
#    optdelta <- exp(optlogdelta)
  }
  
  maxdelta <- exp(optlogdelta[which.max(optLL)])
  
  #handler of grids with NaN log
  optLL=GAPIT.replaceNaN(optLL)   
  
  maxLL <- max(optLL)
  if ( is.null(Z) ) {
    maxva <- sum(etas*etas/(eig.R$values+maxdelta))/(n-q)    
  }
  else {
    maxva <- (sum(etas.1*etas.1/(eig.R$values+maxdelta))+etas.2.sq/maxdelta)/(n-q)
  }
  maxve <- maxva*maxdelta

  return (list(REML=maxLL,delta=maxdelta,ve=maxve,vg=maxva))
}


##############################################################################################
GAPIT.Genotype <- function(G=NULL,GD=NULL,GM=NULL,KI=NULL,
                kinship.algorithm=NULL,SNP.effect="Add",PCA.total=0,seed=123, SNP.fraction =1,
                file.path=NULL,file.from=NULL, file.to=NULL, file.total=NULL, file.fragment = 1000,SNP.test=TRUE,
                file.G =NULL,file.Ext.G =NULL,
                file.GD=NULL,file.Ext.GD=NULL,
                file.GM=NULL,file.Ext.GM=NULL,
                SNP.MAF=0.05,FDR.Rate = 0.05,SNP.FDR=1,
                Timmer=NULL,Memory=NULL,
                LD.chromosome=NULL,LD.location=NULL,LD.range=NULL,
                GP = NULL, 
                kinship.complement = TRUE, 
                bin.size = 1000, 
                inclosure.size = 100,
                SNP.CV=NULL ){
#Object: To unify genotype and calculate kinship and PC if required:
#       1.For G data, convert it to GD
#       2.For GD data return it back simply if no KI and PC required
#       3.Samling GD and create KI and PC
#       4.Go through multiple files
#       5.In any case, GD must be returned (for QC)
#Output: GD, GI, GT, KI and PC
#Authors: Zhiwu Zhang
#Last update: August 11, 2011
##############################################################################################

print("Genotyping: numericalization, sampling kinship, PCs and much more...")

#Create logical variables
byData=!is.null(G) | !is.null(GD)
byFile=!is.null(file.G) | !is.null(file.GD)
hasGenotype=(byData | byFile  )
needKinPC=(is.null(KI) | PCA.total>0)

if(!is.null(KI) & !byData & !byFile & !SNP.test) {
return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory))
}

#Set indicator for full GD
fullGD=FALSE
if(byData) fullGD=TRUE
if(byFile & SNP.fraction==1 & needKinPC) fullGD=TRUE

#SET GT to NULL in case of no genotype
if(!byData & !byFile) {
if(is.null(KI)) stop("GAPIT says: Kinship has to be provided or estimated from genotype!!!")
return (list(GD=NULL,GI=NULL,GT=NULL,hasGenotype=FALSE, genoFormat=NULL, KI=KI,PC=NULL,byFile=FALSE,fullGD=TRUE,Timmer=Timmer,Memory=Memory))
}

genoFormat="hapmap"
if(is.null(G)&is.null(file.G)) genoFormat="EMMA"

#Multiple genotype files


#Diagnose user setteing

#agreement among file from, to and total
if(!is.null(file.from) &!is.null(file.to) &!is.null(file.total)){
if(file.total!=(file.to-file.from+1))  stop("GAPIT says: Conflict among file (from, to and total)")
}
if(!is.null(file.from) &!is.null(file.to)) {
  if(file.to<file.from)  stop("GAPIT says: file.from should smaller than file.to")
}
#file.from and file.to must be in pair
if(is.null(file.from) &!is.null(file.to) ) stop("GAPIT says: file.from and file.to must be in pair)")
if(!is.null(file.from) &is.null(file.to) ) stop("GAPIT says: file.from and file.to must be in pair)")

#assign file.total
if(!is.null(file.from) &!is.null(file.to) ) file.total=file.to-file.from+1
if(byFile& is.null(file.total)) stop("GAPIT says: file.from and file.to must be provided!)")


if(!is.null(KI) &!is.null(kinship.algorithm))  stop("GAPIT says: You can not specify kinship.algorithm and provide kinship at same time!!!")

if(!needKinPC &SNP.fraction<1)  stop("GAPIT says: You did not require calculate kinship or PCs. SNP.fraction should not be specified!!!")

if(!SNP.test & is.null(KI) & !byData & !byFile)  stop("GAPIT says: For SNP.test optioin, please input either use KI or use genotype")
 
#if(is.null(file.path) & !byData & byFile) stop("GAPIT Ssays: A path for genotype data should be provided!")
if( is.null(file.total) & !byData & byFile) stop("APIT Ssays: Number of file should be provided: >=1")
if(!is.null(G) & !is.null(GD)) stop("APIT Ssays: Both hapmap and EMMA format exist, choose one only.")

if(!is.null(file.GD) & is.null(file.GM)) stop("APIT Ssays: Genotype data and map files should be in pair")
if(is.null(file.GD) & !is.null(file.GM)) stop("APIT Ssays: Genotype data and map files should be in pair")

if(!is.null(GD) & is.null(GM)) stop("APIT Says: Genotype data and map files should be in pair")
if(is.null(GD) & !is.null(GM)) stop("APIT Says: Genotype data and map files should be in pair")

#if(!byData & !byFile) stop("APIT Ssays: Either genotype data or files should be given!")

#if(byData&(!is.null(file.path))) stop ("APIT Ssays: You have provided geotype data. file.path should not be provided!")

#Set defaul method for kinship.algorithm
if(is.null(kinship.algorithm)) kinship.algorithm="VanRaden"

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype loaded")
Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype loaded")

#Inital GLD
 GLD=NULL
 
#Handler of read data in numeric format (EMMA)
#Rename GM as GI
if(!is.null(GM))GI=GM
rm(GM)
gc()
#Extract GD and GT from read data GD
if(!is.null(GD) )
{
GT=as.matrix(GD[,1])  #get taxa
GD=as.matrix(GD[,-1]) #remove taxa column
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GT created from GD)")
Memory=GAPIT.Memory(Memory=Memory,Infor="GT created from GD")
}


#Hapmap format
if(!is.null(G))
{

#SUPER
mySUPER=GAPIT.SUPER(GI=G[-1,c(1,3,4)],GP=GP,bin.size=bin.size,inclosure.size=inclosure.size,SNP.CV=SNP.CV)
theIndex=mySUPER$index
SNP.QTN=mySUPER$SNP.QTN
if(!is.null(theIndex))G=G[c(TRUE,theIndex),]

#Convert HapMap to numerical
print(paste("Converting genotype...",sep=""))
hm=GAPIT.HapMap(G,SNP.effect=SNP.effect)

#Extracting SNP for LD plot
if(!is.null(LD.chromosome)){
print("Extracting SNP for LD plot...")
  chromosome=(G[,3]==LD.chromosome[1])
  bp=as.numeric(as.vector(G[,4]))
  deviation=abs(bp-as.numeric(as.vector(LD.location[1])) )
  location=deviation< as.numeric(as.vector(LD.range[1])  )
  index=chromosome&location
  GLD=G[index,]
  
}else{
#print("No data in GLD")
  GLD=NULL
}
        
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="HapMap")
Memory=GAPIT.Memory(Memory=Memory,Infor="HapMap")
print(paste("Converting genotype done.",sep=""))
rm(G)
gc()
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="G removed")
Memory=GAPIT.Memory(Memory=Memory,Infor="G removed")

GT=hm$GT
GD=hm$GD
GI=hm$GI
rm(hm)
gc()
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="hm removed")
Memory=GAPIT.Memory(Memory=Memory,Infor="hm removed")
}


#From files
if(!byData & byFile){
print("Loading genotype from files...")
  numFileUsed=file.to
  if(!needKinPC)numFileUsed=file.from
  
  #Initial GI as storage
  GD=NULL
  GT=NULL
  GI=NULL
  GLD=NULL
  
  #multiple fragments or files
  for (file in file.from:numFileUsed){

    frag=1
    numSNP=file.fragment
    myFRG=NULL
   #print(paste("numSNP  before while is ",numSNP))
    
    while(numSNP==file.fragment) {     #this is problematic if the read end at the last line
    print(paste("Reading file: ",file,"Fragment: ",frag))
      myFRG=GAPIT.Fragment( file.path=file.path,file.from=file.from, file.to=file.to,file.total=file.total,file.G=file.G,file.Ext.G=file.Ext.G,
                            seed=seed,SNP.fraction=SNP.fraction,SNP.effect=SNP.effect,genoFormat=genoFormat,
                            file.GD=file.GD,file.Ext.GD=file.Ext.GD,file.GM=file.GM,file.Ext.GM=file.Ext.GM,
                            file.fragment=file.fragment,file=file,frag=frag,
                            LD.chromosome=LD.chromosome,LD.location=LD.location,LD.range=LD.range)
   #print(paste("numSNP after while is ",numSNP))
     #print(paste("OK with file: ",file,"Fragment: ",frag))
    
      if(is.null(GT) & !is.null(myFRG$GT))GT= as.matrix(myFRG$GT)
            
      if(is.null(GD)){
        GD= myFRG$GD
      }else{
        if(!is.null(myFRG$GD))    { 
          GD=cbind(GD,myFRG$GD)
        }
      }
          
      if(is.null(GI)){
        GI= myFRG$GI
      }else{
        if(!is.null(myFRG$GI))    {
          colnames(myFRG$GI)=c("SNP","Chromosome","Position")        
          GI=as.data.frame(rbind(as.matrix(GI),as.matrix(myFRG$GI)))
        }
      }

      if(is.null(GLD)){
        GLD= myFRG$GLD
      }else{
        if(!is.null(myFRG$GLD))    {
          if(myFRG$heading){
          GLD=as.data.frame(rbind(as.matrix(GLD),as.matrix(myFRG$GLD[-1,])))
          }else{
          GLD=as.data.frame(rbind(as.matrix(GLD),as.matrix(myFRG$GLD)))          
          }
        }
      }
            
      #print("This fragment is joined")
  
      if(file==file.from & frag==1)GT=as.matrix(myFRG$GT)

      frag=frag+1
      if(!is.null(myFRG$GI))    {
        numSNP=myFRG$linesRead[1]
      }else{
       numSNP=0
      }
              
      if(!needKinPC)numSNP=0  #force to end the while loop
      if(is.null(myFRG))numSNP=0  #force to end the while loop

      
        
        
    } #end of repeat on fragment
   # print("This file is OK")
  } #end of file loop
  print("All files loaded")
} #end of if(!byData&byFile)


#print("file loaded")


Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Sampling genotype")
Memory=GAPIT.Memory(Memory=Memory,Infor="Sampling genotype")

#Plot thirt part kinship

if(!is.null(KI)) {
  if(nrow(KI)<1000){
    print("Plotting Kinship")
    theKin=as.matrix(KI[,-1])
    colnames(theKin)=KI[,1]
    rownames(theKin)=KI[,1]
    
    print("Creating heat map for kinship...")
    pdf(paste("GAPIT.Kin.thirdPart.pdf",sep=""), width = 12, height = 12)
    par(mar = c(25,25,25,25))
    heatmap.2(theKin,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
    dev.off()
    print("Kinship heat map PDF created!")
  } #end of if(nrow(KI)<1000)
} #end of if(!is.null(KI))

#Create kinship from genotype if not provide
if(is.null(KI)&!is.null(GD))
{
  
  print("Calculating kinship...")
  print(paste("Number of individuals and SNPs are ",nrow(GD)," and ",ncol(GD)))
  if(kinship.algorithm=="EMMA")theKin= emma.kinship(snps=t(as.matrix(.5*GD)), method="additive", use="all")
  if(kinship.algorithm=="Loiselle")theKin= GAPIT.kinship.loiselle(snps=t(as.matrix(.5*GD)), method="additive", use="all")
  if(kinship.algorithm=="VanRaden")theKin= GAPIT.kinship.VanRaden(snps=as.matrix(GD))
  
  colnames(theKin)=GT
  rownames(theKin)=GT
  
    
  if(length(GT)<1000){
    #Create heat map for kinship
    print("Creating heat map for kinship...")
 
    pdf(paste("GAPIT.Kin.",kinship.algorithm,".pdf",sep=""), width = 12, height = 12)
    par(mar = c(25,25,25,25))
    heatmap.2(theKin,  cexRow =.2, cexCol = 0.2, col=rev(heat.colors(256)), scale="none", symkey=FALSE, trace="none")
    dev.off()
    print("Kinship heat map created")
  }
  
  print("Adding IDs to kinship...")
  #Write the kinship into a text file
  KI=cbind(GT,as.data.frame(theKin)) #This require big memory. Need a way to solve it.
  
  print("Writing kinship to file...")
  write.table(KI, paste("GAPIT.Kin.",kinship.algorithm,".csv",sep=""), quote = FALSE, sep = ",", row.names = FALSE,col.names = FALSE)
  print("Kinship save as file")
  
  rm(theKin)
  gc()
  
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Estimating kinship")
  Memory=GAPIT.Memory(Memory=Memory,Infor="Estimating kinship")
  print("Kinship created!")
}  #end of if(is.null(KI)&!is.null(GD))


#Create PC
PC=NULL
if(PCA.total>0){
PC=GAPIT.PCA(X = GD, taxa = GT, PC.number = PCA.total)
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PCA")
Memory=GAPIT.Memory(Memory=Memory,Infor="PCA")
print("PC created")
}


#LD plot
#print("LD section")
if(!is.null(GLD)){

if(nrow(GLD)>500){
  GLD=GLD[1,]
  print("WARNING: The number of SNPs requested is beyond limitation. No LD plot created.")
}
if(nrow(GLD)>1){
print("Plot LD...")


hapmapgeno= data.frame(as.matrix(t(GLD[,-c(1:11)])))

hapmapgeno[hapmapgeno=="NN"]=NA
hapmapgeno[hapmapgeno=="XX"]=NA
hapmapgeno[hapmapgeno=="--"]=NA
hapmapgeno[hapmapgeno=="++"]=NA
hapmapgeno[hapmapgeno=="//"]=NA


LDdist=as.numeric(as.vector(GLD[,4]))
LDsnpName=GLD[,1]
colnames(hapmapgeno)=LDsnpName

#Prune SNM names
#LDsnpName=LDsnpName[GAPIT.Pruning(LDdist,DPP=7)]
LDsnpName=LDsnpName[c(1,length(LDsnpName))] #keep the first and last snp names only

#print(hapmapgeno)
print("Getting genotype object")

LDsnp=makeGenotypes(hapmapgeno,sep="",method=as.genotype)   #This need to be converted to genotype object

print("Caling LDheatmap...")
pdf(paste("GAPIT.LD.chromosom",LD.chromosome,"(",round(max(0,LD.location-LD.range)/1000000),"_",round((LD.location+LD.range)/1000000),"Mb)",".pdf",sep=""), width = 12, height = 12)
#pdf(paste("GAPIT.LD.pdf",sep=""), width = 12, height = 12)
par(mar = c(25,25,25,25))
MyHeatmap <- try(LDheatmap(LDsnp, LDdist, LDmeasure="r", add.map=TRUE,
  SNP.name=LDsnpName,color=rev(cm.colors(20)), name="myLDgrob", add.key=TRUE,geneMapLabelY=0.1) )

if(!inherits(MyHeatmap, "try-error")) { 
  #Modify the plot
  grid.edit(gPath("myLDgrob", "Key", "title"), gp=gpar(cex=.5, col="blue"))  #edit key title size and color
  grid.edit(gPath("myLDgrob", "geneMap", "title"), gp=gpar(just=c("center","bottom"), cex=0.8, col="black")) #Edit gene map title  
  grid.edit(gPath("myLDgrob", "geneMap","SNPnames"), gp = gpar(cex=0.3,col="black")) #Edit SNP name  
}else{
  print("Warning: error in converting genotype. No LD plot!")
}

dev.off()  
print("LD heatmap crated")    
#grid.edit(gPath("myLDgrob", "heatMap","title"), gp=gpar(cex=1.0))   #Make title smaler
#grid.edit(gPath("myLDgrob", "geneMap", "title"), gp=gpar(just=c("right","bottom"), cex=0.5, col="blue")) #Edit gene map title  
#grid.edit(gPath("myLDgrob", "Key", "labels"), gp=gpar(cex=.5, col="black"))  #edit key lable size and color
}else{ # alternative of if(nrow(GLD)>1)
  print("Warning: There are less than two SNPs on the region you sepcified. No LD plot!")
} #end of #if(nrow(GLD)>1)
}#end of if(!is.null(GLD))

print("Genotype successfully acomplished")
return (list(GD=GD,GI=GI,GT=GT,hasGenotype=hasGenotype, genoFormat=genoFormat, KI=KI,PC=PC,byFile=byFile,fullGD=fullGD,Timmer=Timmer,Memory=Memory,SNP.QTN=SNP.QTN))
}


##############################################################################################
GAPIT.kinship.VanRaden<- function(snps,hasInbred=TRUE) {
# Object: To calculate the kinship matrix using the method of VanRaden (2009, J. Dairy Sci. 91:4414?��C4423)
# Authors: Zhwiu Zhang
# Last update: August 15, 2011 
############################################################################################## 

print("Calculating kinship with VanRaden method...")

#snps=hm$GD 
nSNP=ncol(snps)
nInd=nrow(snps)
n=nInd
snpMean= apply(snps,2,mean)
snps=snps-snpMean
print("Getting X'X...")
K=tcrossprod((snps), (snps))

print("Adjusting...")
#Extract diagonals
i=1:n
j=(i-1)*n
index=i+j
d=K[index]
DL=min(d)
DU=max(d)
floor=min(K)

K=(K-floor)/(DL-floor)
MD=(DU-floor)/(DL-floor)

#Handler of diagonals over 2
#print("MD")
#print(MD)
#print(K[1:5,1:5])

if(is.na(K[1,1])) stop ("GAPIT says: Missing data is not allowed for numerical genotype data")
if(MD>2)K[index]=K[index]/(MD-1)+1

#Handler of inbred
if(MD<2 & hasInbred) K=2*K/((DU-floor)/(DL-floor))

print("Calculating kinship with VanRaden method: done")

return(K)
}

##############################################################################################
GAPIT.Fragment <- function(file.path=NULL,file.from=NULL, file.to=NULL,file.total=NULL,file.G=NULL,
                          file.Ext.G=NULL,seed=123,SNP.fraction=1,SNP.effect="Add",
                          genoFormat=NULL, file.GD=NULL, file.Ext.GD=NULL, file.GM=NULL, file.Ext.GM=NULL, file.fragment=NULL,
                          file=1,frag=1,LD.chromosome=NULL,LD.location=NULL,LD.range=NULL){
#Object: To load SNPs on a (frag)ment in file (this is to replace sampler)
#Output: genotype data sampled
#Authors: Alex Lipka and Zhiwu Zhang
# Last update: August 18, 2011
##############################################################################################

#print("Fragmental reading...")
genoFormat="hapmap"
if(!is.null(file.GD)&is.null(file.G)) genoFormat="EMMA"
  
if(genoFormat=="hapmap"){
        #Initical G
        #print("Reading file...")
        G=NULL
        if(frag==1){
          skip.1=0
          G <- try(read.delim(paste(file.path,file.G,file, ".",file.Ext.G,sep=""),
                          head = FALSE,skip = skip.1, nrows = file.fragment+1),silent=TRUE)
        }else{
          skip.1 <- (frag-1)*file.fragment +1
          G <- try(read.delim(paste(file.path,file.G,file, ".",file.Ext.G,sep=""),
                          head = FALSE,skip = skip.1, nrows = file.fragment),silent=TRUE )
        }
        
        #print("processing the data...")
        if(inherits(G, "try-error"))  {
          G=NULL
          #print("File end reached for G!!!")
        }

        if(is.null(G)){
        #print("The above error indicating reading after end of file (It is OK).")
        return(list(GD=NULL,GI=NULL,GT=NULL,linesRead=NULL,GLD=NULL,heading=NULL) )
        }

        #print("Calling hapmap...")
        heading=(frag==1)
        
        #Recording number of lineas read
        if(heading){
          n= nrow(G)-1
        }else{
          n= nrow(G)
        } 
       
       linesRead=n
               
        #Sampling
       if(SNP.fraction<1){

          #print("Number of SNP in this pragment:")
          #print(n)
          
           set.seed(seed+(file*1000)+frag)
          #mySample=sample(1:n,max(2,floor(n*as.numeric(as.vector(SNP.fraction)))))
          mySample=sample(1:n,max(2,floor(n*SNP.fraction)))
          
          #print(length(mySample))
          if(heading){
            G=G[c(1,(1+mySample)),]
          }else{
            G=G[mySample,]
          }
        } #end of if(SNP.fraction<1)
        

              
        hm=GAPIT.HapMap(G,SNP.effect=SNP.effect,heading=heading)

        #print("Extracting snps for LD plot...")
        #Extract SNPs for LD plot
        if(!is.null(LD.chromosome) & !is.null(hm$GD)){
          index=(G[,3]==LD.chromosome[1]) & abs((as.numeric(G[,4])-as.numeric(LD.location[1]))<(as.numeric(LD.range[1])/2))   
          GLD=G[index,]
        }else{
          GLD=NULL
        }
        
        rm(G)
        gc()
       # print("hapmap called sucesfuly from fragment")

        return(list(GD=hm$GD,GI=hm$GI,GT=hm$GT,linesRead=linesRead,GLD=GLD,heading=heading))

          print("ERROR: It should no get here!!!")        
} #end of "hapmap"



if(genoFormat=="EMMA"){
#print("The file is a numerical format!")
        #Initial GD
        GD=NULL
        skip.1 <- (frag-1)*file.fragment
        #Skip the remaining columns
        GD.temp <- try(read.table(paste(file.path,file.GD, file, ".", file.Ext.GD,sep=""), head = TRUE, nrows = 1),silent=TRUE)
        num.SNP <- ncol(GD.temp)-1
        rm(GD.temp)
        read.in <- min(file.fragment,(num.SNP-skip.1))
        skip.2 <- max((num.SNP - (skip.1 + read.in)),0)


        GD <- try(read.table(paste(file.path,file.GD,file, ".",file.Ext.GD,sep=""), head = TRUE,
                  colClasses = c("factor", rep("NULL", skip.1), rep("numeric", read.in),
                  rep("NULL", skip.2))) ,silent=TRUE)
        GI <- try(read.table(paste(file.path,file.GM,file, ".",file.Ext.GM,sep=""), head = TRUE,
                  skip=skip.1, nrows=file.fragment) ,silent=TRUE)
                  
        if(inherits(GD, "try-error"))  {
          GD=NULL
          print("File end reached for GD!!!")
        }
        if(inherits(GI, "try-error"))  {
          GI=NULL
          print("File end reached for GI!!!")
        }                          
                  
        if(is.null(GD)) return(list(GD=NULL, GI=NULL,GT=NULL,linesRead=NULL,GLD=NULL))
        
        GT=GD[,1]  #Extract infividual names

        GD=GD[,-1] #Remove individual names
#print("Numerical file read sucesfuly from fragment") 
        linesRead=ncol(GD)       
        if(SNP.fraction==1) return(list(GD=GD, GI=GI,GT=GT,linesRead=linesRead,GLD=NULL))
        
        if(SNP.fraction<1){
          n= ncol(GD)
          set.seed(seed+file)
          sample=sample(1:n,floor(n*SNP.fraction))
          return(list(GD=GD[,sample], GI=GI[sample,],GT=GT,linesRead=linesRead,GLD=NULL))
        }
    } # end of the "EMMA"
#print("fragment ended succesfully!")
}#End of fragment