## crop545 - lab 1
## Introduction to R

## Y.Song 1/9/2018 

# 000000000000000000000000000000000000000000000
#
#            ----  1. Primer ----
#
# 000000000000000000000000000000000000000000000

# --- 1.1. Use R as a calculator --------------

# arithmetic
1+2
56-20
2*2
10/4

# algebra
2^3             # 2 to the 3rd power
sqrt(25)        # square root of 25

sin(pi/2)   
tan(pi/4)

log(2.7182818)  # ln(2.7182818)
log(10)         # ln(10)

pi
e
exp(1)          # exponential


# --- 1.2. Getting help --------------

# what does a functor do?
?log()

# try the example provided by the document
log(exp(3))
log10(1e7) # = 7


# how to do a specific task?
??mean

# most of the time, Google is your best friend





# 00000000000000000000000000000000000000000000000
#
#  --- 2. R Objects - Variables & Constants ----
#
# 00000000000000000000000000000000000000000000000

## Values
a=2 
b=3 
c=a+b  
c

(d=a*b) # brackets let the result will show in the console


## Vectors
(x = c(1,2,3,5,7))   # c() is a function to combine single values to a vector
(y = 6:10)           # take a sequance of natural numbers with the interval of 1
(z = seq(1, 5, 0.5)) # the sequence function help defines an interval of choice


## Matrix
(M = matrix(c(11, 21, 12, 22), 2, 2))


## dataframe
# we will use x, y to generate a dataframe
(data = cbind(x,y))         # this is a matrix 
is(data)
(data = as.data.frame(data))
is(data)
# colomns in dataframe can get names
names(data) = c("XX", "YY")
data$XX


## logic values
tt = TRUE       
ff = FALSE   
# logic values are very helpful and important,  
# we don't normally use it in this way.

# one useful example of using logic values
x
x > 3
x[x>3] 
# we will discuss more in the subsamping part


## string values (text)
names = c("ava", "leah", "emma")


## list
(l.list = list(data, M, names))

## to check what type of data it is
is(x)
is(data)
is(names)
is(l.list)


# 00000000000000000000000000000
#
#       ---- 3. Files----
#
# 00000000000000000000000000000

# It is important to know where on the computer (a 
# folder on the hard drive, for example) we are 
# working with.

getwd()

# ----------------- 3.1. Reading (Importing) -------------------

# copy Zhiwu's orignial code:
# (the code is provided on our course website)
myGD=read.table(file="~/Dropbox/Current/ZZLab/WSUCourse/CROPS545/Demo/mdp_numeric.txt",head=T)

# how to make it correct:
myGD.1=read.table(file="C:/Users/yuanh/Documents/Dropbox/COURSE/CROP_545/2018/R intro/mdp_numeric.txt",head=T)
myGM.1=read.table(file="C:/Users/yuanh/Documents/Dropbox/COURSE/CROP_545/2018/R intro/mdp_SNP_information.txt",head=T)

# It is tedious to type the long folder path each time.
# We can set a 'working directory' to tell R we are working 
# in a specific folder and stick there
setwd("C:/Users/yuanh/Documents/Dropbox/COURSE/CROP_545/2018/R intro")
myGD.2=read.table(file="mdp_numeric.txt", head=T)
myGM.2=read.table(file="mdp_SNP_information.txt", head=T)

# we can also let R find a data online
myGD.3=read.table(file="http://zzlab.net/GAPIT/data/mdp_numeric.txt",head=T)
myGM.3=read.table(file="http://zzlab.net/GAPIT/data/mdp_SNP_information.txt",head=T)

myGD = myGD.1 # make a copy of the data, and make it
myGM = myGM.1


# ------------------- 3.2. Writing (Exporting) ------------------ 
# let's save new copy of the data on the hard drive
write.table(myGD,file="mygd.txt",row.names = T)





# 0000000000000000000000000000000000000000000000000000000
#
#             ---- 4. data manipulation ----
#
# 0000000000000000000000000000000000000000000000000000000

# -------------- 4.1. examine the data ------------------
(mx = matrix(1:27, 9, 3))
(df = as.data.frame(mx))

# what is the data type?
is(mx)
is(df)
is(myGD)
is(myGM)

# how many colums and 
dim(mx)
dim(df)
dim(myGD)
dim(myGM)

# preview
head(mx) # head function shows the first 6 rows of the data
str(df)  # structure function gives a brief description of the data
View(df) # View function open the data viewer and display the data
summary(df) # summary function gives basic descriptive statistics

# when dataset get too big, it is not a good idea to preview top 6 rows
# let's just rendering a portion of the data

myGD[1:10, 1:5]  # print first 10 rows and first 5 columns of the data


# -------------- 4.2. subsample ------------------


# SUBSAMPLING WITH LOCATION
df

# observe the output, notice the [,1], [2,]
# [1, ] means first row
# [ ,2] means second column
# [i, j] means element at i-th row, j-th column

df[1,]     # show the first row of df
df[,2]     # show the second col of df
df[1,2]    # show the element at first row second col

df[,-2]    # everything but second column
df[-1,-3]  # everything but the first row, third column

x
x[3]       # the third element in x

# SUBSAMPLING WITH VARIABLE
df

# observe the output, notice the "V1, V2, V3"
# we can use that to select data
df$V1

df$V1 >=4
df[df$V1>=4,] # be careful about the comma


# -------------- 4.3. combine and merge ------------------

# create a text string vector for the example
(type = c("a", "b", "c"))
(type = rep(type, 3))


# combine df (the 9X3 dataframe) with new elements
(big.df = cbind(df, type)) # add type as a new col
(big.df = rbind(big.df, c(109, 118, 127, "c"))) # add a new row


# -------------- 4.4. basic operations ------------------

# vector operation
# we will use the x and y vectors as example.
x 
y

x+y # add each element correspondingly
x*y # multiply each element correspondingly (dot-multiply)
2*x # scalar multiplication

mean(x)   # the mean of x
sum(x)    # the sum of all elements

# matrix operation
# we use the matrix M we created before
M
t(M)      # transpose
solve(M)  # inverse





# 0000000000000000000000000000000000000000000000000000000
#
#             ---- 5. Functions ----
#
# 0000000000000000000000000000000000000000000000000000000


# ---------- 5.1. define a function of your own ----------

# define a function called "poi.p" that takes input value 
# of lambda and k, and operates the calculation as desired, 
# and return the value. 
poi.p = function (lambda, k) {
  p = (lambda^k)*exp(-lambda)/factorial(k)
  return(p)
}

poi.p(lambda = 3, k=2)
poi.p(lambda = 5, k=2)
poi.p(lambda = 5, k=10)

# some part of the function is optional. In the following
# example, we calculate the same thing, and the optional 
# part has been taken out:
poi.p2 = function (lambda, k) {
  (lambda^k)*exp(-lambda)/factorial(k)
  # return(p)
  }
poi.p2(3, 2)








# ----------        5.2. IF condition          ----------

x <- c("what","is","truth")

# IF: if(criteria){
#         the expression to be executes if criteria meets
#         }

if("Truth" %in% x) {
  print("Truth is found")
} 
# nothing happened because no Truth is found
# notice Truth and truth is not the same in R



# IF ELSE: if(criteria) {expression
#            } else { expression 
#            }

if("Truth" %in% x) {
  print("Truth is found")
} else {
  print("Truth is not found")
}

# IF ELSE: if(criteria) {expression
#            } else if { expression 
#            } else {expression
#            }

if("Truth" %in% x) {
  print("Truth is found the first time")
} else if ("truth" %in% x) {
  print("truth is found the second time")
} else {
  print("No truth found")
}

# Example found at https://www.tutorialspoint.com/r/r_if_else_statement.htm



# ----------           5.3. for loops      ------------------
# creating an empty vector save a space for later calculation
z=c()  
for(i in 1:5){z[i] = i+5}
z

# combine if condition and for loop we will find the 
# number of even numbers in the vector x
x 
count = 0
for (v in x) {
  if(v %% 2 == 0) {count = count+1  # what does %% do?
    }
}
print(count)








# ---------- 5.4. packages

# packages are collections of functions. 

chisq.test(data) # without loading the correct package,
                 # the function won't work

install.packages("MASS")
library(MASS)

# now try the same thing again
chisq.test(data)