library(purrr)
library(stringr)   ### For str_c

Applying functions

1.

(Not graded)

a.

n <- 100

sum(map_dbl(1: n, function(x){
  1 / x
}))

[1] 5.187378

b.

n <- 100
m <- 50

sum(unlist(map(1:m, function(j) {
  map(1:n, function(i) {
    2^(-i * j)
  })
})))

[1] 1.606695

c.

Part a.

### Set a large n
n <- 1e7

### map time
system.time({
  sum(map_dbl(1:n, function(x) {
    1 / x
  }))
})

   user  system elapsed 
   4.94    0.01    4.95

### for loop time
system.time({
  out <- 0

  for (i in 1:n){
    out <- out + 1 / i
  }
  out
})

   user  system elapsed 
   0.18    0.00    0.18

### Vectorized operation time
system.time({
  i <- 1:n
  sum(1 / i)
})

   user  system elapsed 
   0.03    0.00    0.03

Part b.

### Initialization
n <- 1e4
m <- 1e3

### map time
system.time({
  sum(unlist(map(1:m, function(j) {
    map(1:n, function(i) {
      2^(-i * j)
    })
  })))
})

   user  system elapsed 
   9.62    0.20    9.83

### Nested for loop time
system.time({
  out <- 0
  
  for (i in 1:n){
    for (j in 1:m){
      out <- out + 2^(- i * j)
    }
  }
  out
})

   user  system elapsed 
   1.14    0.00    1.14

### Vectorized operation 1 time
system.time({
  ind <- matrix(1:n) %*% matrix(1:m, 1)
  sum(2^(- ind))
})

   user  system elapsed 
   0.88    0.00    0.87

### Vectorized operation 2 time
system.time({
  ind <- matrix(1:n, nrow = n, ncol = m) * matrix(1:m, nrow = n, ncol = m, byrow = TRUE)
  sum(2^(- ind))
})

   user  system elapsed 
   0.96    0.00    0.97

### Vectorized operation 3 time
system.time({
  ind <- outer(1:n, 1:m, "*")
  sum(2^(- ind))
})

   user  system elapsed 
   0.91    0.00    0.91

As we can see, surprisingly, map is the slowest among these three, while the vectorized operation is the fastest. The map is useful for functional programming. We use it for its consistency in the format of input and output, as well as its concision when coding, which sacrifices a little bit of computation time. That is not a really big problem for usual simulations.

2.

(Not graded)

a.

str(mtcars)

'data.frame':   32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

map_dbl(mtcars, median)

    mpg     cyl    disp      hp    drat      wt    qsec      vs      am    gear 
 19.200   6.000 196.300 123.000   3.695   3.325  17.710   0.000   0.000   4.000 
   carb 
  2.000

b.

do.call(rbind, map(split(mtcars, mtcars$cyl), colMeans))

       mpg cyl     disp        hp     drat       wt     qsec        vs
4 26.66364   4 105.1364  82.63636 4.070909 2.285727 19.13727 0.9090909
6 19.74286   6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286
8 15.10000   8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000
         am     gear     carb
4 0.7272727 4.090909 1.545455
6 0.4285714 3.857143 3.428571
8 0.1428571 3.285714 3.500000

c.

str(classdata::cities)

'data.frame':   1207 obs. of  14 variables:
 $ City               : chr  "Adel" "Albia" "Ames" "Amosa" ...
 $ Population         : int  4067 3699 52541 5646 36876 2464 32059 2611 12899 25571 ...
 $ Violent.crime      : int  3 9 147 5 36 0 75 2 33 120 ...
 $ Murder             : int  0 0 0 0 0 0 1 0 0 1 ...
 $ Rape               : int  0 0 24 0 6 0 9 1 15 8 ...
 $ Robbery            : int  0 0 17 0 6 0 13 0 0 13 ...
 $ Aggravated.assault : int  3 9 106 5 24 0 52 1 18 98 ...
 $ Property.crime     : int  114 67 1465 99 806 18 832 23 412 1226 ...
 $ Burglary           : int  13 11 363 16 140 10 156 7 112 296 ...
 $ Larceny.theft      : int  93 53 1061 81 626 4 644 15 285 881 ...
 $ Motor.vehicle.theft: int  8 3 41 2 40 4 32 1 15 49 ...
 $ Arson              : int  0 2 5 3 4 0 6 0 3 23 ...
 $ State              : chr  "iowa" "iowa" "iowa" "iowa" ...
 $ Year               : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...

map_chr(classdata::cities, class)

               City          Population       Violent.crime              Murder 
        "character"           "integer"           "integer"           "integer" 
               Rape             Robbery  Aggravated.assault      Property.crime 
          "integer"           "integer"           "integer"           "integer" 
           Burglary       Larceny.theft Motor.vehicle.theft               Arson 
          "integer"           "integer"           "integer"           "integer" 
              State                Year 
        "character"           "integer"

d.

str(iris)

'data.frame':   150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

map_dbl(map(iris, unique), length)

Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
          35           23           43           22            3

e.

map2_dbl(iris$Sepal.Length, iris$Sepal.Width, `*`)

  [1] 17.85 14.70 15.04 14.26 18.00 21.06 15.64 17.00 12.76 15.19 19.98 16.32
 [13] 14.40 12.90 23.20 25.08 21.06 17.85 21.66 19.38 18.36 18.87 16.56 16.83
 [25] 16.32 15.00 17.00 18.20 17.68 15.04 14.88 18.36 21.32 23.10 15.19 16.00
 [37] 19.25 17.64 13.20 17.34 17.50 10.35 14.08 17.50 19.38 14.40 19.38 14.72
 [49] 19.61 16.50 22.40 20.48 21.39 12.65 18.20 15.96 20.79 11.76 19.14 14.04
 [61] 10.00 17.70 13.20 17.69 16.24 20.77 16.80 15.66 13.64 14.00 18.88 17.08
 [73] 15.75 17.08 18.56 19.80 19.04 20.10 17.40 14.82 13.20 13.20 15.66 16.20
 [85] 16.20 20.40 20.77 14.49 16.80 13.75 14.30 18.30 15.08 11.50 15.12 17.10
 [97] 16.53 17.98 12.75 15.96 20.79 15.66 21.30 18.27 19.50 22.80 12.25 21.17
[109] 16.75 25.92 20.80 17.28 20.40 14.25 16.24 20.48 19.50 29.26 20.02 13.20
[121] 22.08 15.68 21.56 17.01 22.11 23.04 17.36 18.30 17.92 21.60 20.72 30.02
[133] 17.92 17.64 15.86 23.10 21.42 19.84 18.00 21.39 20.77 21.39 15.66 21.76
[145] 22.11 20.10 15.75 19.50 21.08 17.70

f.

pmap_dbl(iris[-5], sum)

  [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
 [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
 [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
 [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
 [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
 [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
 [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8

pmap_dbl(iris[-5], function(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width){
  Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
})

  [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
 [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
 [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
 [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
 [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
 [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
 [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8

temp <- iris
### Remove the entry names, otherwise call by names
names(temp) <- NULL
pmap_dbl(temp[-5], function(a, b, c, d){
  a + b + c + d
})

  [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
 [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
 [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
 [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
 [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
 [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
 [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8

### Check
rowSums(iris[-5])

  [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
 [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
 [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
 [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
 [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
 [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
 [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8

3.

library(repurrrsive)

a.

### Character names
(chr.name <- map_chr(got_chars, `[[`, "name"))

 [1] "Theon Greyjoy"      "Tyrion Lannister"   "Victarion Greyjoy" 
 [4] "Will"               "Areo Hotah"         "Chett"             
 [7] "Cressen"            "Arianne Martell"    "Daenerys Targaryen"
[10] "Davos Seaworth"     "Arya Stark"         "Arys Oakheart"     
[13] "Asha Greyjoy"       "Barristan Selmy"    "Varamyr"           
[16] "Brandon Stark"      "Brienne of Tarth"   "Catelyn Stark"     
[19] "Cersei Lannister"   "Eddard Stark"       "Jaime Lannister"   
[22] "Jon Connington"     "Jon Snow"           "Aeron Greyjoy"     
[25] "Kevan Lannister"    "Melisandre"         "Merrett Frey"      
[28] "Quentyn Martell"    "Samwell Tarly"      "Sansa Stark"

### Actor names
(act.name <- map_chr(map(got_chars, `[[`, "playedBy"), str_c, collapse = ", "))

 [1] "Alfie Allen"                               
 [2] "Peter Dinklage"                            
 [3] ""                                          
 [4] "Bronson Webb"                              
 [5] "DeObia Oparei"                             
 [6] ""                                          
 [7] "Oliver Ford"                               
 [8] ""                                          
 [9] "Emilia Clarke"                             
[10] "Liam Cunningham"                           
[11] "Maisie Williams"                           
[12] ""                                          
[13] "Gemma Whelan"                              
[14] "Ian McElhinney"                            
[15] ""                                          
[16] "Isaac Hempstead-Wright"                    
[17] "Gwendoline Christie"                       
[18] "Michelle Fairley"                          
[19] "Lena Headey"                               
[20] "Sean Bean, Sebastian Croft, Robert Aramayo"
[21] "Nikolaj Coster-Waldau"                     
[22] ""                                          
[23] "Kit Harington"                             
[24] "Michael Feast"                             
[25] "Ian Gelder"                                
[26] "Carice van Houten"                         
[27] ""                                          
[28] ""                                          
[29] "John Bradley-West"                         
[30] "Sophie Turner"

b.

(Not graded)

### File names
Q3bfile.name <- str_c(chr.name, ".txt", sep = "")

### Save txt files
walk2(act.name, Q3bfile.name, writeLines)

c.

### File names
Q3cfile.name <- str_c(chr.name, "All.txt", sep = "")

### Method 1 (with imap)
cont <- map(got_chars, function(x) {
  ### Connect with formatting
  str_c(
    ### Connect text without "\n" in each entry
    imap_chr(x, function(x, i) {
      str_c(i, str_c(x, collapse = ", "), sep = ": ")
    }),
    collapse = "\n"
  )
})
### Save txt files
walk2(cont, Q3cfile.name, writeLines)


### Method 2 (without imap)
### Connect vectors in each entries
cont.dat <- map(got_chars, map_chr, str_c, collapse = ", ")
### Get sublist entry name
name.dat <- map(got_chars, names)
### Format the result
cont <- map2(name.dat, cont.dat, str_c, sep = ": ", collapse="\n")

### Save txt files
walk2(cont, Q3cfile.name, writeLines)

4.

(Not graded)

l <- list(
  matrix(c(2, 3,
           4, 5), 2, 2),
  diag(c(1, 2)), 
  diag(c(-2, 1))
)

a.

reduce(l, `%*%`)

     [,1] [,2]
[1,]   -4    8
[2,]   -6   10

b.

accumulate(l, `+`)

[[1]]
     [,1] [,2]
[1,]    2    4
[2,]    3    5

[[2]]
     [,1] [,2]
[1,]    3    4
[2,]    3    7

[[3]]
     [,1] [,2]
[1,]    1    4
[2,]    3    8

Advanced functions

1.

a.

x <- 10
f1 <- function(x) {
  function() {
    x + 10
  }
}
g <- f1(1)
g()

[1] 11

Masking. x assigned in f1, which is 1, mask x defined outside f1 (in the global environment), which is 10.

b.

y <- 2
i1<- function() {
  z <- 3
  c(y, z)
}
y <- 3
i1()

[1] 3 3

Dynamic lookup. R looks up the value of y when i1 runs. The y in the global environment has been updated to 3 at that step.

c.

(Not graded)

f <- function(x) print("ha")
g <- function(y) print("hello")
y = f()

[1] "ha"

g(y)

[1] "hello"

Functions are ordinary variables and lazy evaluation. Functions can be assigned. y in g is not evaluated because it is not accessed in g.

d.

f <- function(x) print("ha")
g <- function(y) print("hello")
g(y = f())

[1] "hello"

Lazy evaluation. Only g is evaluated, y in g is not evaluated because it is not accessed in g.

2.

a.

### Method 1
getf <- function(a) {
  ### Force evaluate a
  force(a)

  function(x) {
    ### Apply to vector x
    map_dbl(x, function(x) {
      ### Create polynomial
      do.call(
        sum,
        ### Create summand
        imap(a, function(a, i) {
          a * x^(i - 1)
        })
      )
    })
  }
}

### Method 2 (simpler)
getf <- function(a){
  ### Force evaluate a
  force(a)
  
  function(x){
    ### Apply to vector x
    map_dbl(x, function(x){
      ### Create polynomial
      sum(a * x ^ (0: (length(a) - 1)))
    })
  }
}

b.

(Not graded)

i.

### Scaler input
testthat::expect_equal(class(getf(1)), "function")
### Vector input
testthat::expect_equal(class(getf(1:10)), "function")

ii.

### Vector input
a <- 2
x <- 1: 10
getf(a)(x)

 [1] 2 2 2 2 2 2 2 2 2 2

### Unit test
testthat::expect_equal(getf(a)(x), rep(a, length(x)))

### Scaler input
a <- 2
x <- 10
getf(a)(x)

[1] 2

### Unit test
testthat::expect_equal(getf(a)(x), rep(a, length(x)))

iii.

### Vector input
a <- c(1, -1)
x <- 1: 10
getf(a)(x)

 [1]  0 -1 -2 -3 -4 -5 -6 -7 -8 -9

### Unit test
testthat::expect_equal(getf(a)(x), 1 - x)

### Scaler input
a <- c(1, -1)
x <- 10
getf(a)(x)

[1] -9

### Unit test
testthat::expect_equal(getf(a)(x), 1 - x)

c.

a <- c(1, 0, 1)
f <- getf(a)
l <- 0
u <- 10
integrate(f, lower = l, upper = u)

343.3333 with absolute error < 3.8e-12

### Unit test
### Expected value from calculus
testthat::expect_equal(integrate(f, lower = l, upper = u)$value, u + 1/3 * u ^ 3)

d.

(Not graded)

getint <- function(a, lower, upper){
  ### Force evaluate a
  force(a)
  
  f <- function(x){
    ### Apply to vector x
    map_dbl(x, function(x){
      ### Create polynomial
      sum(a * x ^ (0: (length(a) - 1)))
    })
  }
  
  ### Integrate
  integrate(f, lower = l, upper = u)
}

### Unit test
a <- c(1, 0, 1)
l <- 0
u <- 10
getint(a = a, lower = l, upper = u)

343.3333 with absolute error < 3.8e-12

### Unit test
### Expected value from calculus
testthat::expect_equal(getint(a = a, lower = l, upper = u)$value, u + 1/3 * u ^ 3)

3. (Optional)

(Not graded)

a.

getl <- function(x) {
  force(x)
  
  function(par) {
    alp <- par[1]
    bet <- par[2]
    
    ### Joint log-likelihood
    sum(alp * log (bet) - lgamma(alp) + (alp - 1) * log (x) - bet * x)
    # dgamma
  }
}

b.

x <- c(0.289, 0.468, 0.035, 2.114, 0.281)
l <- getl(x)

c.

### Method 1
alp <- 1
bet <- seq(0.1, 2, length.out = 1000)
### Each column is a pair for par
temp <- data.frame(rbind(alp, bet))
l.val <- map(temp, l)
plot(bet, l.val, type="l", xlab = "beta", ylab = "Joint log-likelihood", main = "Joint log-likelihood against beta with alpha = 1")

### Method 2
alp <- 1
bet <- seq(0.1, 2, length.out = 1000)

l.val <- map(bet, function(x){
  l(c(alp, x))
})
plot(bet, l.val, type="l", xlab = "beta", ylab = "Joint log-likelihood", main = "Joint log-likelihood against beta with alpha = 1")

4. (Optional)

(Not graded)

x <- c(0.104, 0.069, 0.034, 0.906, 0.75, 
       0.484, 0.574, 1.747, 0.254, 0.591, 
       0.088, 0.643, 0.754, 0.004, 0.495, 
       1.499, 0.802, 0.495, 0.105, 0.64)
l <- getl(x)
(out <- optim(par = c(1, 1), fn = l, control = list(fnscale = -1)))

$par
[1] 0.9635267 1.7461073

$value
[1] -8.103168

$counts
function gradient 
      59       NA 

$convergence
[1] 0

$message
NULL

### Maximum likelihood estimates
MLE <- out$par
names(MLE) <- c("alpha", "beta")
MLE

    alpha      beta 
0.9635267 1.7461073

Convergence code 0 indicates successful completion.

Homework 6

STAT 486/586 – Introduction to Statistical Computing

Yuhang (Tom) LIN

Due: March 10, 2022

Applying functions

1.

a.

b.

c.

Part a.

Part b.

2.

a.

b.

c.

d.

e.

f.

3.

a.

b.

c.

4.

a.

b.

Advanced functions

1.

a.

b.

c.

d.

2.

a.

b.

i.

ii.

iii.

c.

d.

3. (Optional)

a.

b.

c.

4. (Optional)