Notes for Hands-On Programming with R

2020/02/09

This post contains all the functions from Hands-On Programming with R by Garrett Grolemund link.

library(ggplot2)
library(Cairo)

Project one - Weighted Dice

roll <- function(){
  die <- 1:6  
  dice <- sample(die, size = 2, replace = TRUE) 
  sum(dice)
}

roll()
## [1] 5
rolls <- replicate(10000, roll())

qplot(rolls, binwidth = 1)

The dice are fair right now.
We want to bias dice towards high numbers.

roll <- function(){
  die <- 1:6  
  dice <- sample(die, size = 2, replace = TRUE,
                 prob = c(1/8, 1/8, 1/8, 1/8, 1/8, 3/8)) 
  sum(dice)
}

roll()
## [1] 8
rolls <- replicate(10000, roll())

qplot(rolls, binwidth = 1)

Project two - Playing cards

deck <- read.csv("../../static/csv/deck.csv")

head(deck, 13)
##     face   suit value
## 1   king spades    13
## 2  queen spades    12
## 3   jack spades    11
## 4    ten spades    10
## 5   nine spades     9
## 6  eight spades     8
## 7  seven spades     7
## 8    six spades     6
## 9   five spades     5
## 10  four spades     4
## 11 three spades     3
## 12   two spades     2
## 13   ace spades     1
# modify values  
deck$value[deck$face == "ace"] <- 14
deck$value[deck$suit == "hearts"] <- 1

Closure
Since there are lots of going on in the global environment, it is possible that the deck dataset could be modified. We would like to store deck dataset in a safe, out-if-the-way-place. We made function deal and shuffle have theiroriginal environment to be differnet from the global environment. Their original environment becomes the runtime environment. In addition, instead of having each function reference the global environment to update deck envir = globalenv(), you can have them reference their parent environment at runtime envir = parent.env(environment()).

setup <- function(deck) {
  DECK <- deck

  DEAL <- function() {
    card <- deck[1, ]
    assign("deck", deck[-1, ], envir = parent.env(environment()))
    card
  }

  SHUFFLE <- function(){
    random <- sample(1:52, size = 52)
    assign("deck", DECK[random, ], envir = parent.env(environment()))
 }

 list(deal = DEAL, shuffle = SHUFFLE)
}

cards <- setup(deck)
deal <- cards$deal
shuffle <- cards$shuffle

# check the environment of deal adn shuffle  
environment(deal)
## <environment: 0x0000000016378f08>
environment(shuffle)
## <environment: 0x0000000016378f08>
# deal and shuffle  
deal()  
##   face   suit value
## 1 king spades    13
shuffle()

deal()
##    face  suit value
## 16 jack clubs    11

Project three - Slot machine

aim - to create a play() function to randomly generate three symbols and to calculate a prize based on those symbols

get_symbols <- function(){
  wheel <- c("DD", "7", "BBB", "BB", "B", "C", "0")
  sample(wheel, size = 3, replace = TRUE, 
         prob = c(0.03, 0.03, 0.06, 0.1, 0.25, 0.01, 0.52))
}

get_symbols()
## [1] "0"  "0"  "BB"
score <- function (symbols) {
  # identify case
  same <- symbols[1] == symbols[2] && symbols[2] == symbols[3]
  bars <- symbols %in% c("B", "BB", "BBB")
  
  # get prize
  if (same) {
    payouts <- c("DD" = 100, "7" = 80, "BBB" = 40, "BB" = 25, 
      "B" = 10, "C" = 10, "0" = 0)
    prize <- unname(payouts[symbols[1]]) 
    # unname returns a copy of an project with the names attribute removed  
  } else if (all(bars)) {
    prize <- 5
  } else {
    cherries <- sum(symbols == "C")
    prize <- c(0, 2, 5)[cherries + 1]
  }
  
  # adjust for diamonds
  diamonds <- sum(symbols == "DD")
  prize * 2 ^ diamonds
}

play <- function() {
  symbols <- get_symbols()
  print(symbols)
  score(symbols)
}

play()
## [1] "B" "0" "0"
## [1] 0
play()
## [1] "B" "7" "B"
## [1] 0

remake play function - make symbols as an attribute of the score

play <- function(){
  symbols <- get_symbols()
  prize <- score(symbols)
  attr(prize, "symbols") <- symbols 
  prize
}

play()
## [1] 0
## attr(,"symbols")
## [1] "BB" "0"  "7"
# another way using structure  
play <- function(){
  symbols <- get_symbols()
  structure(score(symbols), symbols = symbols)
}

play()
## [1] 0
## attr(,"symbols")
## [1] "0"  "BB" "B"

create a slot_display function to make results prettier

slot_display <- function(prize){
  
  # extract symbols  
  symbols <- attr(prize, "symbols")
  
  # collapse symbols into single string  
  symbols <- paste(symbols, collapse = " ")
  
  # combine symbol with prize as a regular expression  
  #\n is regular expression for new line (i.e. return or enter) 
  string <- paste(symbols, prize, sep = "\n$")
  
  # display regular expression in console without quotes  
  cat(string)
}

slot_display(play())
## B B 0
## $0
# let's run it step by step 
one_play <- play()

symbols <- attr(one_play, "symbols")
symbols  
## [1] "B" "B" "0"
symbols <- paste(symbols, collapse = " ")
symbols
## [1] "B B 0"
string <- paste(symbols, one_play, sep = "\n$")
string
## [1] "B B 0\n$0"
cat(string)
## B B 0
## $0
# cat does not surround its output with quotation marks
# cat also replaces every \n with a new line or new break 

There is one function that can automatically clean up the output of play each time it is displayed. This function is print.

# what's inside print function ? 
print
## function (x, ...) 
## UseMethod("print")
## <bytecode: 0x0000000015828118>
## <environment: namespace:base>

UseMethod function examines the class of the input that you provide for the first arugument of print and then passes all of your arguments to a new function designed to handle that class of input.

check which methods exist for a generic function by calling methods on the function.

head(methods(print))
## [1] "print.acf"      "print.AES"      "print.all_vars" "print.anova"   
## [5] "print.any_vars" "print.aov"

This system of generic functions, methods, and class-based dispatch is known as S3 because it is originated in the third version of S language.

Let’s give one-play a class of its own.

class(one_play) <- "slots"

Let’s write an S3 method for the slots class. The method does need to be named print.slots; otherwise UseMethod will not find it. It does have to take the same arguments as print.

# check print argument  
args(print)   
## function (x, ...) 
## NULL
print.slots <- function(x, ...) {
  cat("I am using the print.slots method")
}

# check if it works  
print(one_play)
## I am using the print.slots method
print.slots <- function(x, ...) {
  slot_display(x)
}

Now, R will automatically use slot_display to display objects of class slots and only objects of class “slots”.

one_play
## B B 0
## $0

Modify the play functon to make it assign slots to the class attributes of its output

play <- function(){
  symbols <- get_symbols()
  structure(score(symbols), symbols = symbols, class = "slots")
}

# check the class of the output  
class(play())
## [1] "slots"
play()
## B 0 B
## $0
play()
## BB 0 0
## $0

Vectorized code

abs_loop <- function(vec){
  for (i in 1:length(vec)) {
    if (vec[i] < 0) {
      vec[i] <- -vec[i]
    }
  }
  vec 
}

# the second way
abs_set <- function(vec){
  negs <- vec <0
  vec[negs] <- vec[negs]*-1
  vec
}

long <- rep(c(-1, 1), 5000000)

system.time(abs_loop(long))
##    user  system elapsed 
##    1.01    0.02    1.03
system.time(abs_set(long))
##    user  system elapsed 
##    0.23    0.04    0.27
system.time(abs(long))
##    user  system elapsed 
##    0.00    0.01    0.01

speed compare

change_symbols <- function(vec){
  for (i in 1:length(vec)){
    if (vec[i] == "DD") {
      vec[i] <- "joker"
    } else if (vec[i] == "C") {
      vec[i] <- "ace"
    } else if (vec[i] == "7") {
      vec[i] <- "king"
    }else if (vec[i] == "B") {
      vec[i] <- "queen"
    } else if (vec[i] == "BB") {
      vec[i] <- "jack"
    } else if (vec[i] == "BBB") {
      vec[i] <- "ten"
    } else {
      vec[i] <- "nine"
    } 
  }
  vec
}

vec <- c("DD", "C", "7", "B", "BB", "BBB", "0")

change_symbols(vec)
## [1] "joker" "ace"   "king"  "queen" "jack"  "ten"   "nine"
many <- rep(vec, 1000000)

system.time(change_symbols(many))
##    user  system elapsed 
##   10.75    0.05   11.31
# the second way  
change_vec <- function (vec) {
  vec[vec == "DD"] <- "joker"
  vec[vec == "C"] <- "ace"
  vec[vec == "7"] <- "king"
  vec[vec == "B"] <- "queen"
  vec[vec == "BB"] <- "jack"
  vec[vec == "BBB"] <- "ten"
  vec[vec == "0"] <- "nine"
  
  vec
}

system.time(change_vec(many))
##    user  system elapsed 
##    0.42    0.03    0.45
# the third way 
# use a lookup table  
change_vec2 <- function(vec){
  tb <- c("DD" = "joker", "C" = "ace", "7" = "king", "B" = "queen", 
    "BB" = "jack", "BBB" = "ten", "0" = "nine")
  unname(tb[vec])
}

system.time(change_vec(many))
##    user  system elapsed 
##    0.39    0.01    0.42

write faster foor loops

  • Do as much as you can outside of the for loop. Every line of code that you place inside of the for loop will be run many, many times. If a line of code only needs to be run once, place it outside of the loop to avoid repetition.
  • Make sure that any storage objects that you use with the loop are large enough to contain all of the results of the loop.
# faster way   
system.time({
  output <- rep(NA, 1000000) 
  for (i in 1:1000000) {
    output[i] <- i + 1
  }
})
##    user  system elapsed 
##    0.06    0.00    0.06
# slow way  
system.time({
  output <- NA  
  # do not set a length for the output vector
  for (i in 1:1000000) {
    output[i] <- i + 1
  }
})
##    user  system elapsed 
##    0.36    0.02    0.40

Return to the project

rewrite get_symbols function

get_many_symbols <- function(n) {
  wheel <- c("DD", "7", "BBB", "BB", "B", "C", "0")
  vec <- sample(wheel, size = 3 * n, replace = TRUE,
    prob = c(0.03, 0.03, 0.06, 0.1, 0.25, 0.01, 0.52))
  matrix(vec, ncol = 3)
}

get_many_symbols(5)
##      [,1] [,2] [,3]
## [1,] "B"  "DD" "0" 
## [2,] "0"  "C"  "0" 
## [3,] "0"  "0"  "B" 
## [4,] "7"  "0"  "0" 
## [5,] "B"  "0"  "BB"

rewrite play function

play_many <- function(n) {
  symb_mat <- get_many_symbols(n = n)
  data.frame(w1 = symb_mat[,1], 
             w2 = symb_mat[,2],
             w3 = symb_mat[,3], 
             prize = score_many(symb_mat))
}

score_many function

# symbols should be a matrix with a column for each slot machine window
score_many <- function(symbols) {

  # Step 1: Assign base prize based on cherries and diamonds ---------
  ## Count the number of cherries and diamonds in each combination
  cherries <- rowSums(symbols == "C")
  diamonds <- rowSums(symbols == "DD") 
  
  ## Wild diamonds count as cherries
  prize <- c(0, 2, 5)[cherries + diamonds + 1]
  
  ## ...but not if there are zero real cherries 
  ### (cherries is coerced to FALSE where cherries == 0)
  prize[!cherries] <- 0
  
  # Step 2: Change prize for combinations that contain three of a kind 
  same <- symbols[, 1] == symbols[, 2] & 
    symbols[, 2] == symbols[, 3]
  payoffs <- c("DD" = 100, "7" = 80, "BBB" = 40, 
    "BB" = 25, "B" = 10, "C" = 10, "0" = 0)
  prize[same] <- payoffs[symbols[same, 1]]
  
  # Step 3: Change prize for combinations that contain all bars ------
  bars <- symbols == "B" | symbols ==  "BB" | symbols == "BBB"
  all_bars <- bars[, 1] & bars[, 2] & bars[, 3] & !same
  prize[all_bars] <- 5
  
  # Step 4: Handle wilds ---------------------------------------------
  
  ## combos with two diamonds
  two_wilds <- diamonds == 2

  ### Identify the nonwild symbol
  one <- two_wilds & symbols[, 1] != symbols[, 2] & 
    symbols[, 2] == symbols[, 3]
  two <- two_wilds & symbols[, 1] != symbols[, 2] & 
    symbols[, 1] == symbols[, 3]
  three <- two_wilds & symbols[, 1] == symbols[, 2] & 
    symbols[, 2] != symbols[, 3]
  
  ### Treat as three of a kind
  prize[one] <- payoffs[symbols[one, 1]]
  prize[two] <- payoffs[symbols[two, 2]]
  prize[three] <- payoffs[symbols[three, 3]]
  
  ## combos with one wild
  one_wild <- diamonds == 1
  
  ### Treat as all bars (if appropriate)
  wild_bars <- one_wild & (rowSums(bars) == 2)
  prize[wild_bars] <- 5
  
  ### Treat as three of a kind (if appropriate)
  one <- one_wild & symbols[, 1] == symbols[, 2]
  two <- one_wild & symbols[, 2] == symbols[, 3]
  three <- one_wild & symbols[, 3] == symbols[, 1]
  prize[one] <- payoffs[symbols[one, 1]]
  prize[two] <- payoffs[symbols[two, 2]]
  prize[three] <- payoffs[symbols[three, 3]]
 
  # Step 5: Double prize for every diamond in combo ------------------
  unname(prize * 2^diamonds)
  
}

system.time(play_many(10000000))
##    user  system elapsed 
##    7.86    1.78    9.81