# CROP545 LAB 11
# CODE PROVIDED BY ZHIWU ZHANG
# COMMENTED AND MODIFIED BY YUANHONG SONG

# In this lab we will discuss the SUPER algorithm in the GAPIT package


# 0. SETUP ----
# 0.1 Packages and functions ----
# Please consider put all your work in one folder, there might be
# multiple output files generated automatically.
silver = "/Users/song/Dropbox/COURSE/CROP_545/2018/labs/lab11"
setwd(silver)
# The following packages are required for this script.
# Please install the package if necessary.
library('MASS') # required for ginv
# 'multtest' package:
source("https://bioconductor.org/biocLite.R")
biocLite("multtest")
library("multtest")
library(gplots)
library(compiler) # required for cmpfun
# DO NOT install the 'compiler' package, simply load.
library("scatterplot3d")
# GAPIT package and functions
source("http://www.zzlab.net/GAPIT/emma.txt")
source("http://www.zzlab.net/GAPIT/gapit_functions.txt")


# 0.2 Data ----
myGD=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

# 0.3 Simulate phenotype data
# Siultate 10 QTN on the first half chromosomes
X=myGD[,-1]
index1to5=myGM[,2]<6
X1to5 = X[,index1to5]
taxa=myGD[,1]
set.seed(99164)
# Save the markers on chromosomes 1-5 with individual names.
GD.candidate=cbind(taxa,X1to5)
# This time we use the simulation fuction in GAPIT to simulate the phynometric
# data. This function works in the same way as the G2P function.
mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,GM=myGM[index1to5,],h2=.5,NQTN=10,QTNDist="norm")

# 1. Calculate kinship from QTNS----
# The following calculate the kinship by VanRaden method.
# Multiple outputs files (plots and spresheet) will be generated automatically.
# It will be useful to keep the output in a seperate folder so we don't get confused
# Here we learn how to create folders directly in R:
kin = "kin"
dir.create(file.path(silver, kin))
setwd("./kin")
myGAPIT0=GAPIT(
  Y=mySim$Y, # phenotype
  GD=GD.candidate, # the genotype data
  GM=myGM[index1to5,], # gene map information
  SNP.test = F, # test SNPs or not
  group.from=5, # The starting number of groups of compression
  group.to=5, # the ending number of groups of compression
  group.by=10, # the interval for crompression groups
  memo="KbyQTN" # goes into the output, remind what you did
  )

# GLM, MLM, and CMLM in GAPIT
# The choice of method is indicated by the grouping number
# group.from = large number, group.to = large, then MLM  (100000, 1000000, 10)
# group.from = small number, group.to = large, then CMLM (1, 100000, 10)
# group.from = small number, group to = small, then GLM (1, 1, 10)
# Note that we were calculating the kinship in previous step, so the grouping 
# number was not indicating the GWAS method.

# 2. GWAS by MLM ----
# Similar thing as above, we setting a new folder for the new outputs.
MLM = "MLM"
dir.create(file.path(silver, MLM))
setwd(paste(silver, MLM, sep="/"))
# make sure you are in the MLM folder
getwd() 

myGAPIT1=GAPIT(
  Y=mySim$Y, #pheno
  GD=myGD,# 
  GM=myGM,
  KI=myGAPIT0$kinship,# including kinship in the algorithm
  QTN.position=mySim$QTN.position,
  PCA.total=3, # including PC
  #SNP.test = F,
  group.from=1000000, 
  group.to=1000000,
  group.by=10,
  memo="GWASbyMLM")

# observed vs fitted
# mySim$u -  the simulated phenotype
# myGAPIT0$Pred$BLUP - predicted phenotype by GAPIT
cor(mySim$u,myGAPIT0$Pred$BLUP)
cor(mySim$u,myGAPIT1$Pred$BLUP)
par(mfrow=c(1,2))
plot(mySim$u,myGAPIT0$Pred$BLUP)
plot(mySim$u,myGAPIT1$Pred$BLUP)


# 3. SUPER ----
SUPER = "SUPER"
dir.create(file.path(silver, SUPER))
setwd(paste(silver, SUPER, sep="/"))
getwd() 

# SUPER is not triggered by the choice of grouping number
# instead, we will indicate the sangwich.top and bottom command
myGAPIT=GAPIT(
  Y=mySim$Y,
  GD=myGD,
  GM=myGM,
  QTN.position=mySim$QTN.position,
  PCA.total=3,
  sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER
  sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER
  LD=0.1,
  memo="SUPER")

# We will stop here today and pick it up next time

# # 4. Power analysis using the GAPID.FDR.TypeI function
# myStat=GAPIT.FDR.TypeI(WS=c(1e0,1e3,1e4,1e5), # window size, the inverse of resolution
#     GM=myGM,
#     seqQTN=mySim$QTN.position,
#     GWAS=myGAPIT$GWAS)
# 
# # 5. Replicate the 
# replicate = "replicate"
# dir.create(file.path(silver, replicate))
# setwd(paste(silver, replicate, sep="/"))
# getwd() 
# 
# nrep=3
# 
# statRep=replicate(nrep,{
#   mySim=GAPIT.Phenotype.Simulation(GD=GD.candidate,
#                                    GM=myGM[index1to5,],
#                                    h2=.5,NQTN=10,QTNDist="norm") # end of simulation
#   # GWAS by SUPER, you may choose any other method
#   myGAPIT=GAPIT(Y=mySim$Y,  GD=myGD,  GM=myGM,  
#                 QTN.position=mySim$QTN.position,  
#                 PCA.total=3,
#                 sangwich.top="MLM", #options are GLM,MLM,CMLM, FaST and SUPER
#                 sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER
#                 LD=0.1,  
#                 memo="SUPER") # end of GWAS
#   # Power analysis
#   myStat=GAPIT.FDR.TypeI(WS=c(1e0,1e3,1e4,1e5),
#                          GM=myGM,
#                          seqQTN=mySim$QTN.position,
#                          GWAS=myGAPIT$GWAS)# end of power analysis
#     }# end of replicated command
#   ) 
# # you may use any other approach to replicate the process and collect the output you want
# 
# #FDR
# s.fdr=seq(3,length(statRep),7)
# fdr=statRep[s.fdr]
# fdr.mean=Reduce ("+", fdr) / length(fdr)
# 
# #AUC: power vs. FDR
# s.auc.fdr=seq(6,length(statRep),7)
# auc.fdr=statRep[s.auc.fdr]
# auc.fdr.mean=Reduce ("+", auc.fdr) / length(auc.fdr)
# 
# theColor=rainbow(4)
# plot(fdr.mean[,1],power , type="b", col=theColor [1],xlim=c(0,1))
# for(i in 2:ncol(fdr.mean)){
#   lines(fdr.mean[,i], power , type="b", col= theColor [i])
# }