library(purrr)

Part I. Multiple choice and multiple answer questions

Question 1.

Answer: hw4.Rmd; ./hw4.Rmd; ../hw4.

Question 2.

Answer:

  • The recycling rule of R is invoked here.
  • The odd entries of x are multiplied by 1, and the even entries are multiplied by -1.
x <- 1:8
x <- x * c(1, -1)
x
[1]  1 -2  3 -4  5 -6  7 -8
length(x)
[1] 8

Question 3.

Answer:

(pets <- c(cat = 10, dog = 15, lizard = 2))
   cat    dog lizard 
    10     15      2 
pets[-1] ### Correct
   dog lizard 
    15      2 
pets[2:3] ### Correct
   dog lizard 
    15      2 
pets[c("dog", "lizard")] ### Correct
   dog lizard 
    15      2 
pets[1:2]
cat dog 
 10  15 

Question 4.

Answer:

x <- iris$Species
mean(x)
[1] NA
median(x)
Error in median.default(x): need numeric data
table(x)  ### Correct
x
    setosa versicolor  virginica 
        50         50         50 
quantile(x)
Error in quantile.default(x): (unordered) factors are not allowed

Question 5.

Answer:

(x <- c(82, 93, 64, 80, 95))
[1] 82 93 64 80 95
sort(x)[1] ### Correct
[1] 64
sort(x, decreasing=TRUE)[1]
[1] 95
rank(x)[1]
[1] 3
order(x)[1]
[1] 3

Question 6.

Answer:

writeLines("\n") ### Correct
writeLines("A\nB")
A
B
writeLines("")
writeLines("AB")
AB
writeLines(" ")
writeLines("A B")
A B
writeLines("<CR>")
<CR>
writeLines("A<CR>B")
A<CR>B

Question 7.

Answer:

x <- c("Junior", "Junior", "Freshman", "Freshman", "Freshman", "Sophomore", "Sophomore", "Sophomore", "Sophomore", "Junior", "Freshman", "Junior", "Freshman", "Freshman")
(x <- as.integer(as.character(x)))
 [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
x <- c("Junior", "Junior", "Freshman", "Freshman", "Freshman", "Sophomore", "Sophomore", "Sophomore", "Sophomore", "Junior", "Freshman", "Junior", "Freshman", "Freshman")
(x <- factor(x))
 [1] Junior    Junior    Freshman  Freshman  Freshman  Sophomore Sophomore
 [8] Sophomore Sophomore Junior    Freshman  Junior    Freshman  Freshman 
Levels: Freshman Junior Sophomore
x <- c("Junior", "Junior", "Freshman", "Freshman", "Freshman", "Sophomore", "Sophomore", "Sophomore", "Sophomore", "Junior", "Freshman", "Junior", "Freshman", "Freshman")
(x <- as.factor(x))
 [1] Junior    Junior    Freshman  Freshman  Freshman  Sophomore Sophomore
 [8] Sophomore Sophomore Junior    Freshman  Junior    Freshman  Freshman 
Levels: Freshman Junior Sophomore
x <- c("Junior", "Junior", "Freshman", "Freshman", "Freshman", "Sophomore", "Sophomore", "Sophomore", "Sophomore", "Junior", "Freshman", "Junior", "Freshman", "Freshman")
(x <- factor(x, c("Freshman", "Sophomore", "Junior", "Senior"))) ### Correct
 [1] Junior    Junior    Freshman  Freshman  Freshman  Sophomore Sophomore
 [8] Sophomore Sophomore Junior    Freshman  Junior    Freshman  Freshman 
Levels: Freshman Sophomore Junior Senior

Question 8.

Answer:

(x <- c(1, 15, 3, 20, 5, 8, 9, 10, 1, 3))
 [1]  1 15  3 20  5  8  9 10  1  3
x >= 13 && x < 10
[1] FALSE
x >= 13 || x < 10
[1] TRUE
x >= 13 | x < 10 ### Correct
 [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
x >= 13 & x < 10
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Question 9.

Answer:

(x <- c(NA, 123))
[1]  NA 123
x == NA
[1] NA NA
(x <- c(NA, 123))
[1]  NA 123
complete(x)
Error in complete(x): could not find function "complete"
(x <- c(NA, 123))
[1]  NA 123
x = NA

(x <- c(NA, 123))
[1]  NA 123
is.na(x) ### Correct
[1]  TRUE FALSE

Question 10.

Answer:

### Example y and yHat
n <- 10
y <- matrix(rnorm(n))
yHat <- matrix(rnorm(n))
str(y)
 num [1:10, 1] 1.263 -0.326 1.33 1.272 0.415 ...
str(yHat)
 num [1:10, 1] 0.764 -0.799 -1.148 -0.289 -0.299 ...
### Desired result
sum((y - yHat)^2)
[1] 26.04449
(y - yHat) %*% (y - yHat)
Error in (y - yHat) %*% (y - yHat): non-conformable arguments
(y - yHat) * (y - yHat)
            [,1]
 [1,]  0.2493612
 [2,]  0.2235170
 [3,]  6.1377896
 [4,]  2.4395032
 [5,]  0.5095912
 [6,]  1.2733750
 [7,]  1.3942662
 [8,]  0.3566487
 [9,]  0.1948785
[10,] 13.2655612
y %*% yHat
Error in y %*% yHat: non-conformable arguments
t(y - yHat) %*% (y - yHat) ### Correct
         [,1]
[1,] 26.04449

Question 11.

Answer:

l <- list(A = c("Red", "Green", "Black"),
          B = matrix(1:6, 2),
          C = FALSE)
str(l)
List of 3
 $ A: chr [1:3] "Red" "Green" "Black"
 $ B: int [1:2, 1:3] 1 2 3 4 5 6
 $ C: logi FALSE
l[2]
$B
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
l$B ### Correct
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
l[names(l) == "B"]
$B
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
l["B"]
$B
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

Question 12.

Answer:

### Example dat
dat <- data.frame(A = 1:10, B = rep("A", 10), C = rep(TRUE, 10))
str(dat)
'data.frame':   10 obs. of  3 variables:
 $ A: int  1 2 3 4 5 6 7 8 9 10
 $ B: chr  "A" "A" "A" "A" ...
 $ C: logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
map(dat, class) ### Correct
$A
[1] "integer"

$B
[1] "character"

$C
[1] "logical"
dim(dat)
[1] 10  3
class(dat)
[1] "data.frame"
class(dat[1:ncol(dat)])
[1] "data.frame"

Question 13.

Answer:

### correct
test1 <- function(gpa) {
  if (gpa >= 3 && gpa <= 3.5) {
    admit <- "yes"
  } else {
    admit <- "no"
  }
  admit
}

test2 <- function(gpa) {
  if (gpa >= 3 || gpa <= 3.5) {
    admit <- "yes"
  } else {
    admit <- "no"
  }
  admit
}
test3 <- function(gpa) {
  if (gpa >= 3 && gpa <= 3.5) {
    admit <- "yes"
  }
  admit <- "no"
  admit
}

test4 <- function(gpa) {
  if (gpa >= 3) {
    admit <- "yes"
  } else if (gpa <= 3.5) {
    admit <- "no"
  }
  admit
}


### Sample gpa
(gpa <- seq(1, 4, by = 0.2))
 [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0
### Desired output
(admit0 <- ifelse((gpa >= 3) & (gpa <= 3.5), "yes", "no"))
 [1] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "yes"
[13] "yes" "no"  "no"  "no" 
identical(map_chr(gpa, test1), admit0)
[1] TRUE
identical(map_chr(gpa, test2), admit0)
[1] FALSE
identical(map_chr(gpa, test3), admit0)
[1] FALSE
identical(map_chr(gpa, test4), admit0)
[1] FALSE

Question 14.

Answer:

  • A while loop is used when the number of iterations is undetermined before starting the loop, e.g., to calculate the smallest n required for \(\sum_{i = 1}^{n} i^2\) to exceed 100000
  • while (TRUE) is equivalent to repeat
  • A for loop is used when the number of iterations is determined before starting the loop, e.g., to calculate a summary statistic for each file in a given list
  • When using a repeat loop, you must use a break statement within the loop because otherwise there will be an infinite loop

Question 15.

Answer: - When performing a repetitive task, it is a good practice to write a function for performing the task once, and then call the function repeatedly - The length of a function should be manageable. If a function is too long, it should be broken up into smaller functions

Question 16.

Answer:

l <- split(iris[, c("Sepal.Length", "Sepal.Width")], iris$Species)
str(l)
List of 3
 $ setosa    :'data.frame': 50 obs. of  2 variables:
  ..$ Sepal.Length: num [1:50] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
  ..$ Sepal.Width : num [1:50] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ versicolor:'data.frame': 50 obs. of  2 variables:
  ..$ Sepal.Length: num [1:50] 7 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 ...
  ..$ Sepal.Width : num [1:50] 3.2 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 ...
 $ virginica :'data.frame': 50 obs. of  2 variables:
  ..$ Sepal.Length: num [1:50] 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 ...
  ..$ Sepal.Width : num [1:50] 3.3 2.7 3 2.9 3 3 2.5 2.9 2.5 3.6 ...
pmap(list(l, 1:2), function(x, y) median(x[[y]]))
Error in `stop_bad_length()`:
! Element 2 of `.l` must have length 1 or 3, not 2
imap(l, function(x, y) median(x[[y]]))
$setosa
NULL

$versicolor
NULL

$virginica
NULL
map2(l, 1:2, function(x, y) median(x[[y]]))
Error: Mapped vectors must have consistent lengths:
* `.x` has length 3
* `.y` has length 2
map(l, function(x) map_dbl(x, median)) ### Correct
$setosa
Sepal.Length  Sepal.Width 
         5.0          3.4 

$versicolor
Sepal.Length  Sepal.Width 
         5.9          2.8 

$virginica
Sepal.Length  Sepal.Width 
         6.5          3.0 

Question 17.

Answer:

l <- list(matrix(2:5, 2, 2),
          matrix(c(1, 0, 0, 2), 2, 2),
          matrix(c(-2, 0, 0, 1), 2, 2))
str(l)
List of 3
 $ : int [1:2, 1:2] 2 3 4 5
 $ : num [1:2, 1:2] 1 0 0 2
 $ : num [1:2, 1:2] -2 0 0 1
### Desired output
reduce(l, `+`)
     [,1] [,2]
[1,]    1    4
[2,]    3    8
sum(l[[1]], l[[2]], l[[3]])
[1] 16
l[[1]] + l[[2]] + l[[3]] ### Correct
     [,1] [,2]
[1,]    1    4
[2,]    3    8
list(l[[1]], l[[1]] + l[[2]], l[[1]] + l[[2]] + l[[3]])
[[1]]
     [,1] [,2]
[1,]    2    4
[2,]    3    5

[[2]]
     [,1] [,2]
[1,]    3    4
[2,]    3    7

[[3]]
     [,1] [,2]
[1,]    1    4
[2,]    3    8
list(l[[1]], l[[2]], l[[3]])
[[1]]
     [,1] [,2]
[1,]    2    4
[2,]    3    5

[[2]]
     [,1] [,2]
[1,]    1    0
[2,]    0    2

[[3]]
     [,1] [,2]
[1,]   -2    0
[2,]    0    1

Question 18.

Answer:

  • The output is 10, because f() searches for the variable x in the global environment, and when f() searches for x, the value is 10
x <- 1
f <- function() {
  x
}
x <- 10
f()
[1] 10

Question 19.

Only tripple(1) will be evaluated, and will be evaluated only once.

tripple <- function(x) x * 3
f <- function(x) {
  y <- 2
  c(x, x)
}
y <- 1
f(tripple(y))
[1] 3 3

Part II. Coding questions

Question 20.

dat <- iris[seq(2, nrow(iris), by = 2), ]
head(dat)

Question 21.

### Example x
x <- c("unknown", "female", "unknown", "male", "male", "female")

for (i in 1:length(x)){
  if (x[i] == "unknown"){
    x[i] <- NA
  }
}
x
[1] NA       "female" NA       "male"   "male"   "female"

Question 22.

### Example z
z <- c("cat", "cat", "dog", "cat", "dog", "dog")

### Method 1
w <- vector("numeric", length(z))
w <- ifelse(z == "cat", 1, 2)
w
[1] 1 1 2 1 2 2
### Method 2
map_dbl(z, function(x){
  if (x == "cat"){
    1
  } else {
    2
  }
})
[1] 1 1 2 1 2 2
### Method 3
w <- vector("numeric", length(z))
for (i in 1:length(z)){
  if (z[i] == "cat"){
    w[i] <- 1
  } else {
    w[i] <- 2
  }
}
w
[1] 1 1 2 1 2 2

Question 23.

SampleVar <- function(x){
  mean((x - mean(x))^2)
}
x <- 1:10
n <- length(x)
testthat::expect_equal(SampleVar(x), (n - 1) / n * var(x))