# Notes for Hands-On Programming with R

## 2020/02/09

This post contains all the functions from Hands-On Programming with R by Garrett Grolemund link.

library(ggplot2)
library(Cairo)

# Project one - Weighted Dice

roll <- function(){
die <- 1:6
dice <- sample(die, size = 2, replace = TRUE)
sum(dice)
}

roll()
##  5
rolls <- replicate(10000, roll())

qplot(rolls, binwidth = 1) The dice are fair right now.
We want to bias dice towards high numbers.

roll <- function(){
die <- 1:6
dice <- sample(die, size = 2, replace = TRUE,
prob = c(1/8, 1/8, 1/8, 1/8, 1/8, 3/8))
sum(dice)
}

roll()
##  8
rolls <- replicate(10000, roll())

qplot(rolls, binwidth = 1) # Project two - Playing cards

• task 1: build the desk
• task 2: write functions that deal and shuffle
• task 4: manage the state of the desk
deck <- read.csv("../../static/csv/deck.csv")

head(deck, 13)
##     face   suit value
## 13   ace spades     1
# modify values
deck$value[deck$face == "ace"] <- 14
deck$value[deck$suit == "hearts"] <- 1

Closure
Since there are lots of going on in the global environment, it is possible that the deck dataset could be modified. We would like to store deck dataset in a safe, out-if-the-way-place. We made function deal and shuffle have theiroriginal environment to be differnet from the global environment. Their original environment becomes the runtime environment. In addition, instead of having each function reference the global environment to update deck envir = globalenv(), you can have them reference their parent environment at runtime envir = parent.env(environment()).

setup <- function(deck) {
DECK <- deck

DEAL <- function() {
card <- deck[1, ]
assign("deck", deck[-1, ], envir = parent.env(environment()))
card
}

SHUFFLE <- function(){
random <- sample(1:52, size = 52)
assign("deck", DECK[random, ], envir = parent.env(environment()))
}

list(deal = DEAL, shuffle = SHUFFLE)
}

cards <- setup(deck)
deal <- cards$deal shuffle <- cards$shuffle

# check the environment of deal adn shuffle
environment(deal)
## <environment: 0x0000000016378f08>
environment(shuffle)
## <environment: 0x0000000016378f08>
# deal and shuffle
deal()  
##   face   suit value
## 1 king spades    13
shuffle()

deal()
##    face  suit value
## 16 jack clubs    11

# Project three - Slot machine

aim - to create a play() function to randomly generate three symbols and to calculate a prize based on those symbols

get_symbols <- function(){
wheel <- c("DD", "7", "BBB", "BB", "B", "C", "0")
sample(wheel, size = 3, replace = TRUE,
prob = c(0.03, 0.03, 0.06, 0.1, 0.25, 0.01, 0.52))
}

get_symbols()
##  "0"  "0"  "BB"
score <- function (symbols) {
# identify case
same <- symbols == symbols && symbols == symbols
bars <- symbols %in% c("B", "BB", "BBB")

# get prize
if (same) {
payouts <- c("DD" = 100, "7" = 80, "BBB" = 40, "BB" = 25,
"B" = 10, "C" = 10, "0" = 0)
prize <- unname(payouts[symbols])
# unname returns a copy of an project with the names attribute removed
} else if (all(bars)) {
prize <- 5
} else {
cherries <- sum(symbols == "C")
prize <- c(0, 2, 5)[cherries + 1]
}

diamonds <- sum(symbols == "DD")
prize * 2 ^ diamonds
}

play <- function() {
symbols <- get_symbols()
print(symbols)
score(symbols)
}

play()
##  "B" "0" "0"
##  0
play()
##  "B" "7" "B"
##  0

remake play function - make symbols as an attribute of the score

play <- function(){
symbols <- get_symbols()
prize <- score(symbols)
attr(prize, "symbols") <- symbols
prize
}

play()
##  0
## attr(,"symbols")
##  "BB" "0"  "7"
# another way using structure
play <- function(){
symbols <- get_symbols()
structure(score(symbols), symbols = symbols)
}

play()
##  0
## attr(,"symbols")
##  "0"  "BB" "B"

create a slot_display function to make results prettier

slot_display <- function(prize){

# extract symbols
symbols <- attr(prize, "symbols")

# collapse symbols into single string
symbols <- paste(symbols, collapse = " ")

# combine symbol with prize as a regular expression
#\n is regular expression for new line (i.e. return or enter)
string <- paste(symbols, prize, sep = "\n$") # display regular expression in console without quotes cat(string) } slot_display(play()) ## B B 0 ##$0
# let's run it step by step
one_play <- play()

symbols <- attr(one_play, "symbols")
symbols  
##  "B" "B" "0"
symbols <- paste(symbols, collapse = " ")
symbols
##  "B B 0"
string <- paste(symbols, one_play, sep = "\n$") string ##  "B B 0\n$0"
cat(string)
## B B 0
## $0 # cat does not surround its output with quotation marks # cat also replaces every \n with a new line or new break  There is one function that can automatically clean up the output of play each time it is displayed. This function is print. # what's inside print function ? print ## function (x, ...) ## UseMethod("print") ## <bytecode: 0x0000000015828118> ## <environment: namespace:base> UseMethod function examines the class of the input that you provide for the first arugument of print and then passes all of your arguments to a new function designed to handle that class of input. check which methods exist for a generic function by calling methods on the function. head(methods(print)) ##  "print.acf" "print.AES" "print.all_vars" "print.anova" ##  "print.any_vars" "print.aov" This system of generic functions, methods, and class-based dispatch is known as S3 because it is originated in the third version of S language. Let’s give one-play a class of its own. class(one_play) <- "slots" Let’s write an S3 method for the slots class. The method does need to be named print.slots; otherwise UseMethod will not find it. It does have to take the same arguments as print. # check print argument args(print)  ## function (x, ...) ## NULL print.slots <- function(x, ...) { cat("I am using the print.slots method") } # check if it works print(one_play) ## I am using the print.slots method print.slots <- function(x, ...) { slot_display(x) } Now, R will automatically use slot_display to display objects of class slots and only objects of class “slots”. one_play ## B B 0 ##$0

Modify the play functon to make it assign slots to the class attributes of its output

play <- function(){
symbols <- get_symbols()
structure(score(symbols), symbols = symbols, class = "slots")
}

# check the class of the output
class(play())
##  "slots"
play()
## B 0 B
## $0 play() ## BB 0 0 ##$0

## Vectorized code

abs_loop <- function(vec){
for (i in 1:length(vec)) {
if (vec[i] < 0) {
vec[i] <- -vec[i]
}
}
vec
}

# the second way
abs_set <- function(vec){
negs <- vec <0
vec[negs] <- vec[negs]*-1
vec
}

long <- rep(c(-1, 1), 5000000)

system.time(abs_loop(long))
##    user  system elapsed
##    1.01    0.02    1.03
system.time(abs_set(long))
##    user  system elapsed
##    0.23    0.04    0.27
system.time(abs(long))
##    user  system elapsed
##    0.00    0.01    0.01

### speed compare

change_symbols <- function(vec){
for (i in 1:length(vec)){
if (vec[i] == "DD") {
vec[i] <- "joker"
} else if (vec[i] == "C") {
vec[i] <- "ace"
} else if (vec[i] == "7") {
vec[i] <- "king"
}else if (vec[i] == "B") {
vec[i] <- "queen"
} else if (vec[i] == "BB") {
vec[i] <- "jack"
} else if (vec[i] == "BBB") {
vec[i] <- "ten"
} else {
vec[i] <- "nine"
}
}
vec
}

vec <- c("DD", "C", "7", "B", "BB", "BBB", "0")

change_symbols(vec)
##  "joker" "ace"   "king"  "queen" "jack"  "ten"   "nine"
many <- rep(vec, 1000000)

system.time(change_symbols(many))
##    user  system elapsed
##   10.75    0.05   11.31
# the second way
change_vec <- function (vec) {
vec[vec == "DD"] <- "joker"
vec[vec == "C"] <- "ace"
vec[vec == "7"] <- "king"
vec[vec == "B"] <- "queen"
vec[vec == "BB"] <- "jack"
vec[vec == "BBB"] <- "ten"
vec[vec == "0"] <- "nine"

vec
}

system.time(change_vec(many))
##    user  system elapsed
##    0.42    0.03    0.45
# the third way
# use a lookup table
change_vec2 <- function(vec){
tb <- c("DD" = "joker", "C" = "ace", "7" = "king", "B" = "queen",
"BB" = "jack", "BBB" = "ten", "0" = "nine")
unname(tb[vec])
}

system.time(change_vec(many))
##    user  system elapsed
##    0.39    0.01    0.42

## write faster foor loops

• Do as much as you can outside of the for loop. Every line of code that you place inside of the for loop will be run many, many times. If a line of code only needs to be run once, place it outside of the loop to avoid repetition.
• Make sure that any storage objects that you use with the loop are large enough to contain all of the results of the loop.
# faster way
system.time({
output <- rep(NA, 1000000)
for (i in 1:1000000) {
output[i] <- i + 1
}
})
##    user  system elapsed
##    0.06    0.00    0.06
# slow way
system.time({
output <- NA
# do not set a length for the output vector
for (i in 1:1000000) {
output[i] <- i + 1
}
})
##    user  system elapsed
##    0.36    0.02    0.40

rewrite get_symbols function

get_many_symbols <- function(n) {
wheel <- c("DD", "7", "BBB", "BB", "B", "C", "0")
vec <- sample(wheel, size = 3 * n, replace = TRUE,
prob = c(0.03, 0.03, 0.06, 0.1, 0.25, 0.01, 0.52))
matrix(vec, ncol = 3)
}

get_many_symbols(5)
##      [,1] [,2] [,3]
## [1,] "B"  "DD" "0"
## [2,] "0"  "C"  "0"
## [3,] "0"  "0"  "B"
## [4,] "7"  "0"  "0"
## [5,] "B"  "0"  "BB"

rewrite play function

play_many <- function(n) {
symb_mat <- get_many_symbols(n = n)
data.frame(w1 = symb_mat[,1],
w2 = symb_mat[,2],
w3 = symb_mat[,3],
prize = score_many(symb_mat))
}

score_many function

# symbols should be a matrix with a column for each slot machine window
score_many <- function(symbols) {

# Step 1: Assign base prize based on cherries and diamonds ---------
## Count the number of cherries and diamonds in each combination
cherries <- rowSums(symbols == "C")
diamonds <- rowSums(symbols == "DD")

## Wild diamonds count as cherries
prize <- c(0, 2, 5)[cherries + diamonds + 1]

## ...but not if there are zero real cherries
### (cherries is coerced to FALSE where cherries == 0)
prize[!cherries] <- 0

# Step 2: Change prize for combinations that contain three of a kind
same <- symbols[, 1] == symbols[, 2] &
symbols[, 2] == symbols[, 3]
payoffs <- c("DD" = 100, "7" = 80, "BBB" = 40,
"BB" = 25, "B" = 10, "C" = 10, "0" = 0)
prize[same] <- payoffs[symbols[same, 1]]

# Step 3: Change prize for combinations that contain all bars ------
bars <- symbols == "B" | symbols ==  "BB" | symbols == "BBB"
all_bars <- bars[, 1] & bars[, 2] & bars[, 3] & !same
prize[all_bars] <- 5

# Step 4: Handle wilds ---------------------------------------------

## combos with two diamonds
two_wilds <- diamonds == 2

### Identify the nonwild symbol
one <- two_wilds & symbols[, 1] != symbols[, 2] &
symbols[, 2] == symbols[, 3]
two <- two_wilds & symbols[, 1] != symbols[, 2] &
symbols[, 1] == symbols[, 3]
three <- two_wilds & symbols[, 1] == symbols[, 2] &
symbols[, 2] != symbols[, 3]

### Treat as three of a kind
prize[one] <- payoffs[symbols[one, 1]]
prize[two] <- payoffs[symbols[two, 2]]
prize[three] <- payoffs[symbols[three, 3]]

## combos with one wild
one_wild <- diamonds == 1

### Treat as all bars (if appropriate)
wild_bars <- one_wild & (rowSums(bars) == 2)
prize[wild_bars] <- 5

### Treat as three of a kind (if appropriate)
one <- one_wild & symbols[, 1] == symbols[, 2]
two <- one_wild & symbols[, 2] == symbols[, 3]
three <- one_wild & symbols[, 3] == symbols[, 1]
prize[one] <- payoffs[symbols[one, 1]]
prize[two] <- payoffs[symbols[two, 2]]
prize[three] <- payoffs[symbols[three, 3]]

# Step 5: Double prize for every diamond in combo ------------------
unname(prize * 2^diamonds)

}

system.time(play_many(10000000))
##    user  system elapsed
##    7.86    1.78    9.81