# Cran R
# http://www.r-project.org/
# R environment and base packages
# Text editor to R (Notepad ++)
# R libraries (packages): C:\Program Files\R\R-3.0.3\library
# Introduction to R: http://cran.r-project.org/doc/manuals/r-release/R-intro.pdf
# R reference card : http://cran.r-project.org/doc/contrib/Short-refcard.pdf

# R statistical environment
# Script based interface
# Everything in R is a object
# Object: data structure, attributes, methods
# R requires minimal object declaration: object type will change with content

# RStudio
# http://www.rstudio.com/
# R IDE (integrated development environment)
# 	Script editor, R interface (build configuration), debuging, project management, version control
# 	(Tinn-R, Eclipse (StatET))

# RStudio components
# Script editor, R command line (console), Environment (+ history), Files (+plots, packages, help)


#####################################
# Using RStudio
#####################################
# File, New Project
# File, New File, R Script

# Controlling the location of data and output
# Explicit pathway description
dataPath <- "C:/Users/Che/UNBC_work/Courses/NRES-798/NRES_lab_1"
dataPath <- "H:/che/NRES_798_lab_1/test_data.csv"
dataReadIn <- read.csv(dataPath,header = TRUE)

# Implicit using the project workspace (working directory)
# Rstudio, Sessions, Set Working Directory
getwd() # Determine what the path of the current working directory
setwd(dataPath) # Sets the path to the specified working directory

dir() # Outputs the files in the current directory

# TODO: change the directory to the root directory of your H drive
# TODO: list the files in your H drive root directory
# TODO: change the directory back to you NRES-798 directory (project folder)

#####################################
# Assignment in R
#####################################

# Three assignment methods ("<-","->","=")
# Be consistent and only use "<-"

a1 <- 15 # assign numeric scalar to a1
a2 <- 3
b1 <- TRUE # logical, assign b as true (T), no quotation
b2 <- FALSE
d <- "ecology" # assign character string to data

# Using R for simple numeric operations (calculator)
3 + 4	# direct operation
e <- 16/4 # output of operation assigned to object
# + - * / ^ %%

# TODO: what does %% do (e.g. 17 %% 5

# TODO: what is produced by
	# a1 + b1
	# a1 + b2
	# a1 / b1
	# a1 / b2
	# b2 / a1
	# b2 / b2
	# a2 + d
	
# The function mode() reveal the type of an object
mode(a1) # "numeric"
mode(b2) # "logical"
mode(d) # "character"

# Working directory vs. working environment
# Only objects that are brought into R's working environment are available for use
ls() # list all the objects in the working environment

# Individual objects can be removed from the enviornment with rm()
rm(a1) # removes a1 from the work space
# TODO: what now happens with
	# e <- a1 + a2
	
#####################################
# vectors objects
#####################################

# vectors: a set of scalars arranged in a one-dimensional array
# vector assignment: c(), read c as concatenate or combine
v1 <- c(1,3,2,5,4)
v2 <- c(2,3)
v3 <- c(v1,v2,v1)	# vectors can also be combined

length(v1)	# returns the length of the vector
length(v3)

# vector indexing: identifying and operating on vector components
v3
v3[4] # nth element of vector
v3[1:4]  # elements 1 to 4 of vector
v3[c(1,3,5)] # specific elements of vector
v3[v3 > 4] # all elements that are greater than 4
v3[4:length(v3)] # fourth element to the end (length(v3)) of the vector
v3[v3 == 3] # all elements that equal (==) 3
v3[v3 != 3] # all elements that do not equal (!=) 3\library
v3[v3 != 3 & v3 > 2] # all elelments that don't equal 3 and are greater than 2
v3[-2] # vector excluding element 2
v3[-c(1,4,6)] # vector excluding listed elements

# operating on vectors
a2
v1
nv1 <- v1 + a2
nv2 <- v2 * a2
nv3 <- v1 + v2 # TODO: what is happening here?
nv4 <- v1 * v2 # TODO: what is happening here?

# altering the sequence of vectors
v4 <- c(1,6,5,3,4,7,2,9,10,8)
v4s <- sort(v4)	# sorts the vector
v4o <- order(v4) # TODO: what is retuned and how does this differ from sort

# creating structured vectors
v4 <- rep(1,10)	# create a vector of 1's that is 10 elements long
v5 <- rep(c(1:5),3) # create a vector of 1:5 replicated 3 times
v6 <- rep(c(1:5), each = 3) # vector of 1:5 with each number sequentially repeated 3 times
v7 <- rep(c(1:5),each = 3, times = 2)

v8 <- c(1:20)
v9 <- seq(1,20,by=2.34)

# factors: vectors where the elements are factors, used to code experiments
v10 <- rep(c("high","med","low","veryLow"),each = 2) # create vector
v10 <- factor(v10) # transform vector in to factor

#####################################
# Matrix objects
#####################################

m1 <- matrix(c(1:12),nrow = 3, ncol = 4) # 3 by 4 matrix filled by column
m2 <- matrix(c(1:12),nrow = 3, ncol = 4, byrow = TRUE) # TODO: how does this differ?

# To determine the size of a matrix
dim(m2)

# Matrix indexing
m1
m1e1 <- m1[2,] # Just row 2
m1e2 <- m1[,3] # Just column 3
m1e3 <- m1[3,1] # Just the element in row 3, column 1

# TODO: when extracting a subset of data from a matrix what is returned?

is.matrix(m1e1)
is.vector(m1e1)
is.numeric(m1e1)
# is.type() tests for type of object (is.vector(),is.array()...)

# Matrix operations
s1 <- 3
v1 <- c(1,2,3)
m1 <- matrix(c(1:12),nrow = 3, ncol = 4)

# TODO: what happens with each of these operations
m1a <- m1 + s1
m1b <- m1 * s1
m1c <- m1 + v1
m1d <- m1 * v1

# matrix transformation
m1
m2 <- t(m1)

# TOD0: how does the operator * differ from %*%
m3 <- m2 %*% v1

#####################################
# List objects
#####################################

# Lists are the "everything" data objects
myList <- list(5,c(4:8),c("not","to","hard"),m1)

# Understanding list indexing is important because many stats functions return a list
# List indexing [[]] for list element, following [] for element components

myList[2] # second element of list returned as type list
myList[[2]] # second element of list returned as vector
myList[[2]][3] # third component of second element of list

myList[[4]][2,] # row 2 of the 4 element of the list (matrix)

myList[[3]][3]

# The structure of an object can be determined using the function str(object)
str(myList)
# TODO: what does the retuned information mean?

#####################################
# Data frame objects
#####################################

# Data frames equivalent to a spread sheet, and very common data storage type
# Each column of the data frame is a vector
# Each vector can be of a different mode (e.g. numeric, factor)

f1 <- rep(c("blue","red","green"),each = 3) # create vector of factors
f2 <- c(1,5,3,7,4,8,5,2,4) # create vector of data

df1 <- data.frame(heatType=f1,age=f2) # create a data frame by binding vectors together

dim(df1) # returns the size of the data frame
str(df1) # returns the structure of the data frame
names(df1) # returns the column "names" of the data frame
names(df1)[2] # specific names can be identified by including their number

# indexing data frames as like a matrix
df1[2,] # data frame components from row 2
df1[,2] # data frame components from column 20

# indexing data frames using column names
df1$heatType # "$" operator specifies variables (column names) in the data frame
df1$heatType[3] # specifies the third element from variable heatType

# Saving data frames (and other data objects) from R-intro
# Saving all files as .csv data files is a very robust way to save data

#write.csv(x, file = "", row.names = FALSE)
write.csv(df1, file = "myDataTest.csv", row.names = FALSE)

# file = "myData.csv" # By not including a pathway the data is saved to the set working directory
# file = "D:/Che/work/movement_data/spring2014.csv"  # Fully specifying the path can be less risky

# .csv file can then be read back into the R enviornment with
#read.csv(file, header = TRUE)

df2 <- read.csv("myDataTest.csv", header=TRUE)


#####################################
# Working with Data frames
#####################################

# Load the inbuilt data set "cars"
data(cars)

# TODO: determine the size of the data set
# TODO: determine the variables names

# Components of data frames can be extracted using the same framework as vectors
cars[cars$speed > 15,] # extract only those rows where speed > 15, return all columns
cars[-c(1:12),2] # remove the first 12 rows and return only the second column

# TODO: extract only the rows where the distance is exactly double the speed

# data frame subsets can also be extracted using the function subset
	# subset(data,component to extract)
cars1 <- subset(cars,cars$speed > 19)

#####################################
# Basic data exploration
#####################################

# Load the inbuilt data set "trees"
data(trees)

# Some of the common summary statistic functions are...
# mean(), min(), max(), range(), var(), sd(), median(), quantile(,...)

# TODO: calculate the mean tree volume
# TODO: determine the minimum and maximum tree height
# TODO: determine the range of tree heights.
# TODO: calculate the sum of all tree Girths
# TODO: calculate the variance and standard deviation of tree volume

# help(): is a very powerful tool in R that provides information and examples for all functions
# e.g. help(mean)

# TODO: use help() to find out what additional arguments need to be included when using quantile().

# TODO: calculate the 25% (first) and 75% (third) quantiles for tree height

# TODO: what does the function summary(trees) return?

# The precision of data can be altered using round(), floor(), and ceiling()

round(trees$Girth,digits=0) # Round the values to the nearest integer

ceiling(trees$volume) # move the data to next highest integer


#####################################
# Creating new functions
#####################################

# The generic structure for creating a new function is...
	# functionName <- function(arg1, arg2..){
	#	function calculations
	# }

# Function to return the calculated sum of squares
sumSquares <- function(x){
	meanX <- mean(x)  # calculate mean of data
	difX <- x - meanX # calculate the difference between the mean and each data point
	sqr.difX <- difX^2 # square the difference
	sum.sqr.difX <- sum(sqr.difX) # sum the squared differences
	return(sum.sqr.difX) # return the sum of squares
}

# TODO: create a function that takes a numerical vector, calculates the median, and returns the square root of the median

# TODO: create a function that takes a 5 by 5 matrix, and a numerical scalar, and returns a vector that contains all of the matrix values that are fully divisible by the value (no remainder)

#####################################
# Initial data exploration and plotting
#####################################

# Load the data set iris
data(iris)  # load the in built data set iris
names(iris)	# list the variable names in iris
summary(iris) # provide an overview summary of the data set

str(iris) # provide more information on the structure of iris
unique(iris$Species) # Lists all of the unique levels of the factor

# plot() is the base level function for ploting data
# plot() what plot produces will depend on the type of object that it is passed
plot(iris) # plot function based on data frame object

plot(iris$Sepal.Length,iris$Sepal.Width) # plot of x vs. y

# Modify the number of plots that are shown in a figure
par(mfrow=c(2,2)) # Change the number of plots to a 2 by 20
hist(iris$Sepal.Length)	# Histogram of sepal length
hist(iris$Sepal.Width)
hist(iris$Petal.Length)
hist(iris$Petal.Width)

# Produce box plot of sepal length for each species
plot(iris$Species, iris$Sepal.Length)  # Data type determines that box plot is to be produced

boxplot(iris$Sepal.Length~iris$Species)  # Explicit framework for producing the same thing

# TODO: plot Sepal.Width vs. Petal.Width
# TODO: rename the X and Y axis to be more informative
# TODO: add a main title at the top of the figure 

# Exercises

# TODO: create a data file in excel, 
	# save as a .csv file, 
	# read the file into R,
	# output the size of the data frame,
	# list all of the data frame components,
	# ceate a new data frame that contans a subset of the original data frame,
	# save the new data frame as a .csv file