library(purrr)
library(stringr) ### For str_c
(Not graded)
n <- 100
sum(map_dbl(1: n, function(x){
1 / x
}))
[1] 5.187378
n <- 100
m <- 50
sum(unlist(map(1:m, function(j) {
map(1:n, function(i) {
2^(-i * j)
})
})))
[1] 1.606695
### Set a large n
n <- 1e7
### map time
system.time({
sum(map_dbl(1:n, function(x) {
1 / x
}))
})
user system elapsed
4.94 0.01 4.95
### for loop time
system.time({
out <- 0
for (i in 1:n){
out <- out + 1 / i
}
out
})
user system elapsed
0.18 0.00 0.18
### Vectorized operation time
system.time({
i <- 1:n
sum(1 / i)
})
user system elapsed
0.03 0.00 0.03
### Initialization
n <- 1e4
m <- 1e3
### map time
system.time({
sum(unlist(map(1:m, function(j) {
map(1:n, function(i) {
2^(-i * j)
})
})))
})
user system elapsed
9.62 0.20 9.83
### Nested for loop time
system.time({
out <- 0
for (i in 1:n){
for (j in 1:m){
out <- out + 2^(- i * j)
}
}
out
})
user system elapsed
1.14 0.00 1.14
### Vectorized operation 1 time
system.time({
ind <- matrix(1:n) %*% matrix(1:m, 1)
sum(2^(- ind))
})
user system elapsed
0.88 0.00 0.87
### Vectorized operation 2 time
system.time({
ind <- matrix(1:n, nrow = n, ncol = m) * matrix(1:m, nrow = n, ncol = m, byrow = TRUE)
sum(2^(- ind))
})
user system elapsed
0.96 0.00 0.97
### Vectorized operation 3 time
system.time({
ind <- outer(1:n, 1:m, "*")
sum(2^(- ind))
})
user system elapsed
0.91 0.00 0.91
As we can see, surprisingly, map
is the slowest among these three, while the vectorized operation is the fastest. The map
is useful for functional programming. We use it for its consistency in the format of input and output, as well as its concision when coding, which sacrifices a little bit of computation time. That is not a really big problem for usual simulations.
(Not graded)
str(mtcars)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
map_dbl(mtcars, median)
mpg cyl disp hp drat wt qsec vs am gear
19.200 6.000 196.300 123.000 3.695 3.325 17.710 0.000 0.000 4.000
carb
2.000
do.call(rbind, map(split(mtcars, mtcars$cyl), colMeans))
mpg cyl disp hp drat wt qsec vs
4 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909
6 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286
8 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000
am gear carb
4 0.7272727 4.090909 1.545455
6 0.4285714 3.857143 3.428571
8 0.1428571 3.285714 3.500000
str(classdata::cities)
'data.frame': 1207 obs. of 14 variables:
$ City : chr "Adel" "Albia" "Ames" "Amosa" ...
$ Population : int 4067 3699 52541 5646 36876 2464 32059 2611 12899 25571 ...
$ Violent.crime : int 3 9 147 5 36 0 75 2 33 120 ...
$ Murder : int 0 0 0 0 0 0 1 0 0 1 ...
$ Rape : int 0 0 24 0 6 0 9 1 15 8 ...
$ Robbery : int 0 0 17 0 6 0 13 0 0 13 ...
$ Aggravated.assault : int 3 9 106 5 24 0 52 1 18 98 ...
$ Property.crime : int 114 67 1465 99 806 18 832 23 412 1226 ...
$ Burglary : int 13 11 363 16 140 10 156 7 112 296 ...
$ Larceny.theft : int 93 53 1061 81 626 4 644 15 285 881 ...
$ Motor.vehicle.theft: int 8 3 41 2 40 4 32 1 15 49 ...
$ Arson : int 0 2 5 3 4 0 6 0 3 23 ...
$ State : chr "iowa" "iowa" "iowa" "iowa" ...
$ Year : int 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
map_chr(classdata::cities, class)
City Population Violent.crime Murder
"character" "integer" "integer" "integer"
Rape Robbery Aggravated.assault Property.crime
"integer" "integer" "integer" "integer"
Burglary Larceny.theft Motor.vehicle.theft Arson
"integer" "integer" "integer" "integer"
State Year
"character" "integer"
str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
map_dbl(map(iris, unique), length)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
35 23 43 22 3
map2_dbl(iris$Sepal.Length, iris$Sepal.Width, `*`)
[1] 17.85 14.70 15.04 14.26 18.00 21.06 15.64 17.00 12.76 15.19 19.98 16.32
[13] 14.40 12.90 23.20 25.08 21.06 17.85 21.66 19.38 18.36 18.87 16.56 16.83
[25] 16.32 15.00 17.00 18.20 17.68 15.04 14.88 18.36 21.32 23.10 15.19 16.00
[37] 19.25 17.64 13.20 17.34 17.50 10.35 14.08 17.50 19.38 14.40 19.38 14.72
[49] 19.61 16.50 22.40 20.48 21.39 12.65 18.20 15.96 20.79 11.76 19.14 14.04
[61] 10.00 17.70 13.20 17.69 16.24 20.77 16.80 15.66 13.64 14.00 18.88 17.08
[73] 15.75 17.08 18.56 19.80 19.04 20.10 17.40 14.82 13.20 13.20 15.66 16.20
[85] 16.20 20.40 20.77 14.49 16.80 13.75 14.30 18.30 15.08 11.50 15.12 17.10
[97] 16.53 17.98 12.75 15.96 20.79 15.66 21.30 18.27 19.50 22.80 12.25 21.17
[109] 16.75 25.92 20.80 17.28 20.40 14.25 16.24 20.48 19.50 29.26 20.02 13.20
[121] 22.08 15.68 21.56 17.01 22.11 23.04 17.36 18.30 17.92 21.60 20.72 30.02
[133] 17.92 17.64 15.86 23.10 21.42 19.84 18.00 21.39 20.77 21.39 15.66 21.76
[145] 22.11 20.10 15.75 19.50 21.08 17.70
pmap_dbl(iris[-5], sum)
[1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
[16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
[31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
[46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
[61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
[76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
[91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
pmap_dbl(iris[-5], function(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width){
Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
})
[1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
[16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
[31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
[46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
[61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
[76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
[91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
temp <- iris
### Remove the entry names, otherwise call by names
names(temp) <- NULL
pmap_dbl(temp[-5], function(a, b, c, d){
a + b + c + d
})
[1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
[16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
[31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
[46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
[61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
[76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
[91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
### Check
rowSums(iris[-5])
[1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
[16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
[31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
[46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
[61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
[76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
[91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
[106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
[121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
[136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
library(repurrrsive)
### Character names
(chr.name <- map_chr(got_chars, `[[`, "name"))
[1] "Theon Greyjoy" "Tyrion Lannister" "Victarion Greyjoy"
[4] "Will" "Areo Hotah" "Chett"
[7] "Cressen" "Arianne Martell" "Daenerys Targaryen"
[10] "Davos Seaworth" "Arya Stark" "Arys Oakheart"
[13] "Asha Greyjoy" "Barristan Selmy" "Varamyr"
[16] "Brandon Stark" "Brienne of Tarth" "Catelyn Stark"
[19] "Cersei Lannister" "Eddard Stark" "Jaime Lannister"
[22] "Jon Connington" "Jon Snow" "Aeron Greyjoy"
[25] "Kevan Lannister" "Melisandre" "Merrett Frey"
[28] "Quentyn Martell" "Samwell Tarly" "Sansa Stark"
### Actor names
(act.name <- map_chr(map(got_chars, `[[`, "playedBy"), str_c, collapse = ", "))
[1] "Alfie Allen"
[2] "Peter Dinklage"
[3] ""
[4] "Bronson Webb"
[5] "DeObia Oparei"
[6] ""
[7] "Oliver Ford"
[8] ""
[9] "Emilia Clarke"
[10] "Liam Cunningham"
[11] "Maisie Williams"
[12] ""
[13] "Gemma Whelan"
[14] "Ian McElhinney"
[15] ""
[16] "Isaac Hempstead-Wright"
[17] "Gwendoline Christie"
[18] "Michelle Fairley"
[19] "Lena Headey"
[20] "Sean Bean, Sebastian Croft, Robert Aramayo"
[21] "Nikolaj Coster-Waldau"
[22] ""
[23] "Kit Harington"
[24] "Michael Feast"
[25] "Ian Gelder"
[26] "Carice van Houten"
[27] ""
[28] ""
[29] "John Bradley-West"
[30] "Sophie Turner"
(Not graded)
### File names
Q3bfile.name <- str_c(chr.name, ".txt", sep = "")
### Save txt files
walk2(act.name, Q3bfile.name, writeLines)
### File names
Q3cfile.name <- str_c(chr.name, "All.txt", sep = "")
### Method 1 (with imap)
cont <- map(got_chars, function(x) {
### Connect with formatting
str_c(
### Connect text without "\n" in each entry
imap_chr(x, function(x, i) {
str_c(i, str_c(x, collapse = ", "), sep = ": ")
}),
collapse = "\n"
)
})
### Save txt files
walk2(cont, Q3cfile.name, writeLines)
### Method 2 (without imap)
### Connect vectors in each entries
cont.dat <- map(got_chars, map_chr, str_c, collapse = ", ")
### Get sublist entry name
name.dat <- map(got_chars, names)
### Format the result
cont <- map2(name.dat, cont.dat, str_c, sep = ": ", collapse="\n")
### Save txt files
walk2(cont, Q3cfile.name, writeLines)
(Not graded)
l <- list(
matrix(c(2, 3,
4, 5), 2, 2),
diag(c(1, 2)),
diag(c(-2, 1))
)
reduce(l, `%*%`)
[,1] [,2]
[1,] -4 8
[2,] -6 10
accumulate(l, `+`)
[[1]]
[,1] [,2]
[1,] 2 4
[2,] 3 5
[[2]]
[,1] [,2]
[1,] 3 4
[2,] 3 7
[[3]]
[,1] [,2]
[1,] 1 4
[2,] 3 8
x <- 10
f1 <- function(x) {
function() {
x + 10
}
}
g <- f1(1)
g()
[1] 11
Masking. x
assigned in f1
, which is 1
, mask x
defined outside f1
(in the global environment), which is 10
.
y <- 2
i1<- function() {
z <- 3
c(y, z)
}
y <- 3
i1()
[1] 3 3
Dynamic lookup. R
looks up the value of y
when i1
runs. The y
in the global environment has been updated to 3
at that step.
(Not graded)
f <- function(x) print("ha")
g <- function(y) print("hello")
y = f()
[1] "ha"
g(y)
[1] "hello"
Functions are ordinary variables and lazy evaluation. Functions can be assigned. y
in g
is not evaluated because it is not accessed in g
.
f <- function(x) print("ha")
g <- function(y) print("hello")
g(y = f())
[1] "hello"
Lazy evaluation. Only g
is evaluated, y
in g
is not evaluated because it is not accessed in g
.
### Method 1
getf <- function(a) {
### Force evaluate a
force(a)
function(x) {
### Apply to vector x
map_dbl(x, function(x) {
### Create polynomial
do.call(
sum,
### Create summand
imap(a, function(a, i) {
a * x^(i - 1)
})
)
})
}
}
### Method 2 (simpler)
getf <- function(a){
### Force evaluate a
force(a)
function(x){
### Apply to vector x
map_dbl(x, function(x){
### Create polynomial
sum(a * x ^ (0: (length(a) - 1)))
})
}
}
(Not graded)
### Scaler input
testthat::expect_equal(class(getf(1)), "function")
### Vector input
testthat::expect_equal(class(getf(1:10)), "function")
### Vector input
a <- 2
x <- 1: 10
getf(a)(x)
[1] 2 2 2 2 2 2 2 2 2 2
### Unit test
testthat::expect_equal(getf(a)(x), rep(a, length(x)))
### Scaler input
a <- 2
x <- 10
getf(a)(x)
[1] 2
### Unit test
testthat::expect_equal(getf(a)(x), rep(a, length(x)))
### Vector input
a <- c(1, -1)
x <- 1: 10
getf(a)(x)
[1] 0 -1 -2 -3 -4 -5 -6 -7 -8 -9
### Unit test
testthat::expect_equal(getf(a)(x), 1 - x)
### Scaler input
a <- c(1, -1)
x <- 10
getf(a)(x)
[1] -9
### Unit test
testthat::expect_equal(getf(a)(x), 1 - x)
a <- c(1, 0, 1)
f <- getf(a)
l <- 0
u <- 10
integrate(f, lower = l, upper = u)
343.3333 with absolute error < 3.8e-12
### Unit test
### Expected value from calculus
testthat::expect_equal(integrate(f, lower = l, upper = u)$value, u + 1/3 * u ^ 3)
(Not graded)
getint <- function(a, lower, upper){
### Force evaluate a
force(a)
f <- function(x){
### Apply to vector x
map_dbl(x, function(x){
### Create polynomial
sum(a * x ^ (0: (length(a) - 1)))
})
}
### Integrate
integrate(f, lower = l, upper = u)
}
### Unit test
a <- c(1, 0, 1)
l <- 0
u <- 10
getint(a = a, lower = l, upper = u)
343.3333 with absolute error < 3.8e-12
### Unit test
### Expected value from calculus
testthat::expect_equal(getint(a = a, lower = l, upper = u)$value, u + 1/3 * u ^ 3)
(Not graded)
getl <- function(x) {
force(x)
function(par) {
alp <- par[1]
bet <- par[2]
### Joint log-likelihood
sum(alp * log (bet) - lgamma(alp) + (alp - 1) * log (x) - bet * x)
# dgamma
}
}
x <- c(0.289, 0.468, 0.035, 2.114, 0.281)
l <- getl(x)
### Method 1
alp <- 1
bet <- seq(0.1, 2, length.out = 1000)
### Each column is a pair for par
temp <- data.frame(rbind(alp, bet))
l.val <- map(temp, l)
plot(bet, l.val, type="l", xlab = "beta", ylab = "Joint log-likelihood", main = "Joint log-likelihood against beta with alpha = 1")
### Method 2
alp <- 1
bet <- seq(0.1, 2, length.out = 1000)
l.val <- map(bet, function(x){
l(c(alp, x))
})
plot(bet, l.val, type="l", xlab = "beta", ylab = "Joint log-likelihood", main = "Joint log-likelihood against beta with alpha = 1")
(Not graded)
x <- c(0.104, 0.069, 0.034, 0.906, 0.75,
0.484, 0.574, 1.747, 0.254, 0.591,
0.088, 0.643, 0.754, 0.004, 0.495,
1.499, 0.802, 0.495, 0.105, 0.64)
l <- getl(x)
(out <- optim(par = c(1, 1), fn = l, control = list(fnscale = -1)))
$par
[1] 0.9635267 1.7461073
$value
[1] -8.103168
$counts
function gradient
59 NA
$convergence
[1] 0
$message
NULL
### Maximum likelihood estimates
MLE <- out$par
names(MLE) <- c("alpha", "beta")
MLE
alpha beta
0.9635267 1.7461073
Convergence code 0
indicates successful completion.