(Not graded)
(A <- diag(1, 4))
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
[2,] 0 1 0 0
[3,] 0 0 1 0
[4,] 0 0 0 1
### Diagonal vector
b <- c(3.1, 2.9, 1.2)
(B <- diag(b, length(b)))
[,1] [,2] [,3]
[1,] 3.1 0.0 0.0
[2,] 0.0 2.9 0.0
[3,] 0.0 0.0 1.2
(C <- matrix(3.1))
[,1]
[1,] 3.1
(D <- matrix(1:3))
[,1]
[1,] 1
[2,] 2
[3,] 3
(E <- matrix(1:10, nrow=2, byrow=TRUE))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 6 7 8 9 10
(F <- matrix(-3:2, nrow=3))
[,1] [,2]
[1,] -3 0
[2,] -2 1
[3,] -1 2
A[1, , drop=FALSE]
[,1] [,2] [,3] [,4]
[1,] 1 0 0 0
### Method 1
c(A[1, ])
[1] 1 0 0 0
### Method 2
as.vector(A[1, ])
[1] 1 0 0 0
B[2, 2] <- 0
B
[,1] [,2] [,3]
[1,] 3.1 0 0.0
[2,] 0.0 0 0.0
[3,] 0.0 0 1.2
A + 1:4
[,1] [,2] [,3] [,4]
[1,] 2 1 1 1
[2,] 2 3 2 2
[3,] 3 3 4 3
[4,] 4 4 4 5
(G <- cbind(E, t(F)))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 2 3 4 5 -3 -2 -1
[2,] 6 7 8 9 10 0 1 2
(H <- rbind(t(E), F))
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
[6,] -3 0
[7,] -2 1
[8,] -1 2
F %*% E
[,1] [,2] [,3] [,4] [,5]
[1,] -3 -6 -9 -12 -15
[2,] 4 3 2 1 0
[3,] 11 12 13 14 15
(Q1cii <- c(t(D) %*% D))
[1] 14
is.matrix(Q1cii)
[1] FALSE
5*B + c(t(D) %*% D)
[,1] [,2] [,3]
[1,] 29.5 14 14
[2,] 14.0 14 14
[3,] 14.0 14 20
B + c(D)
[,1] [,2] [,3]
[1,] 4.1 1 1.0
[2,] 2.0 2 2.0
[3,] 3.0 3 4.2
diag(B)
[1] 3.1 0.0 1.2
solve(t(F) %*% F) %*% E
[,1] [,2] [,3] [,4] [,5]
[1,] 0.537037 0.7037037 0.8703704 1.037037 1.203704
[2,] 1.629630 1.9629630 2.2962963 2.629630 2.962963
eigen(t(F) %*% F)$values
[1] 15.520797 3.479203
c(det(t(F) %*% F))
[1] 54
str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
### Method 1
dat <- subset(iris, Species %in% c("setosa", "versicolor"))
str(dat)
'data.frame': 100 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
### Method 2
dat <- iris[iris$Species %in% c("setosa", "versicolor"), ]
str(dat)
'data.frame': 100 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(dat)
### Method 1
Y <- matrix((dat$Species=="setosa") + 0)
str(Y)
num [1:100, 1] 1 1 1 1 1 1 1 1 1 1 ...
### Method 2
Y <- matrix(ifelse(dat$Species=="setosa", 1, 0))
str(Y)
num [1:100, 1] 1 1 1 1 1 1 1 1 1 1 ...
head(Y)
[,1]
[1,] 1
[2,] 1
[3,] 1
[4,] 1
[5,] 1
[6,] 1
X <- as.matrix(cbind(Intercept = 1, dat[, c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]))
str(X)
num [1:100, 1:5] 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:100] "1" "2" "3" "4" ...
..$ : chr [1:5] "Intercept" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
head(X)
Intercept Sepal.Length Sepal.Width Petal.Length Petal.Width
1 1 5.1 3.5 1.4 0.2
2 1 4.9 3.0 1.4 0.2
3 1 4.7 3.2 1.3 0.2
4 1 4.6 3.1 1.5 0.2
5 1 5.0 3.6 1.4 0.2
6 1 5.4 3.9 1.7 0.4
is.matrix(X)
[1] TRUE
is.data.frame(X)
[1] FALSE
(Not graded)
kappa(t(X) %*% X)
[1] 5932
The condition number of \(X^{T} X\) at this moment is not large, and we can have the inverse as follows.
solve(t(X) %*% X)
Intercept Sepal.Length Sepal.Width Petal.Length Petal.Width
Intercept 1.6361800 -0.24398598 -0.11687224 -0.04186220 0.24252185
Sepal.Length -0.2439860 0.11782921 -0.07210655 -0.07528896 0.04860338
Sepal.Width -0.1168722 -0.07210655 0.11434112 0.07076812 -0.05781627
Petal.Length -0.0418622 -0.07528896 0.07076812 0.17424619 -0.33595503
Petal.Width 0.2425219 0.04860338 -0.05781627 -0.33595503 0.80395551
X <- cbind(X, "Ave" = (X[, "Sepal.Length"] + X[, "Sepal.Width"]) / 2)
str(X)
num [1:100, 1:6] 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:100] "1" "2" "3" "4" ...
..$ : chr [1:6] "Intercept" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
head(X)
Intercept Sepal.Length Sepal.Width Petal.Length Petal.Width Ave
1 1 5.1 3.5 1.4 0.2 4.30
2 1 4.9 3.0 1.4 0.2 3.95
3 1 4.7 3.2 1.3 0.2 3.95
4 1 4.6 3.1 1.5 0.2 3.85
5 1 5.0 3.6 1.4 0.2 4.30
6 1 5.4 3.9 1.7 0.4 4.65
(Not graded)
kappa(t(X) %*% X)
[1] 1.658751e+16
The condition number of \(X^{T} X\) is very large, which means the matrix is non-invertible.
### Use R chunk option "error = TRUE" to show error messages
solve(t(X) %*% X) %*% t(X) %*% Y
Error in solve.default(t(X) %*% X): system is computationally singular: reciprocal condition number = 4.09163e-17
We can also check the determinant of \(X^{T} X\).
det(t(X) %*% X)
[1] -2.591e-06
We cannot calculate the least square estimate \(\widehat{\beta} = (X^T X) X^T Y\) in this case, as \((X^{T} X)\) is non-invertible with a small determinant and large condition number.
A more specific reason is that we add a dependent column in part e, which causes multicolinearity and makes \(X\) not full column rank.
First, let’s use \(\lambda = 1\).
### lambda
lam.1 <- 1
### Identity matrix
I <- diag(1, ncol(X))
### Ridge regression coefficient estimate
(bet.lam.1 <- solve(t(X) %*% X + lam.1*I) %*% t(X) %*% Y)
[,1]
Intercept 0.26759328
Sepal.Length 0.04538613
Sepal.Width 0.14255224
Petal.Length -0.23673066
Petal.Width -0.23628206
Ave 0.09396919
### Fitted values
Y.lam.1 <- X %*% bet.lam.1
### RSS
RSS.1 <- c(t(Y - Y.lam.1) %*% (Y - Y.lam.1))
### Resulting list
fit <- list(coefficients=bet.lam.1, fitted.values=Y.lam.1, RSS=RSS.1)
str(fit)
List of 3
$ coefficients : num [1:6, 1] 0.2676 0.0454 0.1426 -0.2367 -0.2363 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:6] "Intercept" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
.. ..$ : NULL
$ fitted.values: num [1:100, 1] 1.023 0.91 0.953 0.878 1.033 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:100] "1" "2" "3" "4" ...
.. ..$ : NULL
$ RSS : num 1.01
### Show the head of each entry in the list
lapply(fit, head)
$coefficients
[,1]
Intercept 0.26759328
Sepal.Length 0.04538613
Sepal.Width 0.14255224
Petal.Length -0.23673066
Petal.Width -0.23628206
Ave 0.09396919
$fitted.values
[,1]
1 1.0233836
2 0.9101410
3 0.9532473
4 0.8777104
5 1.0331002
6 1.0086339
$RSS
[1] 1.009922
Further try \(\lambda = 2\) and \(3\).
### Compute the desired quantities again
lam.2 <- 2
I <- diag(rep(1, ncol(X)))
bet.lam.2 <- solve(t(X) %*% X + lam.2*I) %*% t(X) %*% Y
Y.lam.2 <- X %*% bet.lam.2
RSS.2 <- c(t(Y - Y.lam.2) %*% (Y - Y.lam.2))
lam.3 <- 3
I <- diag(rep(1, ncol(X)))
bet.lam.3 <- solve(t(X) %*% X + lam.3*I) %*% t(X) %*% Y
Y.lam.3 <- X %*% bet.lam.3
RSS.3 <- c(t(Y - Y.lam.3) %*% (Y - Y.lam.3))
### Create a ouput matrix
out <- matrix(c(lam.1, lam.2, lam.3, RSS.1, RSS.2, RSS.3), nrow=3)
colnames(out) <- c("lambda", "RSS")
out
lambda RSS
[1,] 1 1.009922
[2,] 2 1.070387
[3,] 3 1.107401
### Find the lambda with smallest RSS
out[which.min(out[, "RSS"]), ]
lambda RSS
1.000000 1.009922
### Use the lambda with smallest RSS
(lam <- out[which.min(out[, "RSS"]), "lambda"])
lambda
1
### Repeat again
I <- diag(rep(1, ncol(X)))
bet.lam <- solve(t(X) %*% X + lam*I) %*% t(X) %*% Y
Y.lam <- X %*% bet.lam
RSS <- c(t(Y - Y.lam) %*% (Y - Y.lam))
### Get predicted labels
Y.Predict <- (Y.lam >= 0.5) + 0
### Number of misclassification
sum(Y.Predict != Y)
[1] 0
(Not graded)
(Q3a <- list(String="Hello World", Number=1:10, Logical.value=TRUE, List.in.list=list(string="Hello World", number=1:10, logical.val=TRUE)))
$String
[1] "Hello World"
$Number
[1] 1 2 3 4 5 6 7 8 9 10
$Logical.value
[1] TRUE
$List.in.list
$List.in.list$string
[1] "Hello World"
$List.in.list$number
[1] 1 2 3 4 5 6 7 8 9 10
$List.in.list$logical.val
[1] TRUE
### Initialize an empty list
Q3b <- vector("list", 10)
### One number each entry
Q3b[1:10] <- 1:10
str(Q3b)
List of 10
$ : int 1
$ : int 2
$ : int 3
$ : int 4
$ : int 5
$ : int 6
$ : int 7
$ : int 8
$ : int 9
$ : int 10
l <- list(A = c("Red", "Green", "Black"),
B = matrix(1:6, nrow = 2),
C = list("Python", "PHP", "Java"))
length(l)
[1] 3
l$C
[[1]]
[1] "Python"
[[2]]
[1] "PHP"
[[3]]
[1] "Java"
str(l$C)
List of 3
$ : chr "Python"
$ : chr "PHP"
$ : chr "Java"
l[["C"]]
[[1]]
[1] "Python"
[[2]]
[1] "PHP"
[[3]]
[1] "Java"
str(l[["C"]])
List of 3
$ : chr "Python"
$ : chr "PHP"
$ : chr "Java"
l[[3]]
[[1]]
[1] "Python"
[[2]]
[1] "PHP"
[[3]]
[1] "Java"
str(l[[3]])
List of 3
$ : chr "Python"
$ : chr "PHP"
$ : chr "Java"
l["C"]
$C
$C[[1]]
[1] "Python"
$C[[2]]
[1] "PHP"
$C[[3]]
[1] "Java"
str(l["C"])
List of 1
$ C:List of 3
..$ : chr "Python"
..$ : chr "PHP"
..$ : chr "Java"
l[3]
$C
$C[[1]]
[1] "Python"
$C[[2]]
[1] "PHP"
$C[[3]]
[1] "Java"
str(l[3])
List of 1
$ C:List of 3
..$ : chr "Python"
..$ : chr "PHP"
..$ : chr "Java"
l[["B"]][2, , drop=FALSE]
[,1] [,2] [,3]
[1,] 2 4 6
l[["B"]] <- l[["B"]] + 1
str(l)
List of 3
$ A: chr [1:3] "Red" "Green" "Black"
$ B: num [1:2, 1:3] 2 3 4 5 6 7
$ C:List of 3
..$ : chr "Python"
..$ : chr "PHP"
..$ : chr "Java"
### Use negative index also works here
l["A"] <- NULL
str(l)
List of 2
$ B: num [1:2, 1:3] 2 3 4 5 6 7
$ C:List of 3
..$ : chr "Python"
..$ : chr "PHP"
..$ : chr "Java"
### c also works here
### iris is a data frame (list) of multiple entries
l <- append(l, list(D=iris))
str(l)
List of 3
$ B: num [1:2, 1:3] 2 3 4 5 6 7
$ C:List of 3
..$ : chr "Python"
..$ : chr "PHP"
..$ : chr "Java"
$ D:'data.frame': 150 obs. of 5 variables:
..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
..$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(lattice)
dat <- barley
str(dat)
'data.frame': 120 obs. of 4 variables:
$ yield : num 27 48.9 27.4 39.9 33 ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 3 7 7 7 7 ...
$ year : Factor w/ 2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5 ...
dim(dat)
[1] 120 4
ncol(dat)
[1] 4
dat <- dat[-1, ]
str(dat)
'data.frame': 119 obs. of 4 variables:
$ yield : num 48.9 27.4 39.9 33 29 ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : Factor w/ 2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
### Correct
dat1 <- rbind(dat, data.frame(yield=50, variety="Svansota", year=1931, site="Duluth"))
### Note that the type for yield is num
str(dat1)
'data.frame': 120 obs. of 4 variables:
$ yield : num 48.9 27.4 39.9 33 29 ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : Factor w/ 2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
tail(dat1)
### Partially correct
dat2 <- rbind(dat, c(yield=50, variety="Svansota", year=1931, site="Duluth"))
### Note that the type for yield is chr
str(dat2)
'data.frame': 120 obs. of 4 variables:
$ yield : chr "48.86667" "27.43334" "39.93333" "32.96667" ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : Factor w/ 2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
### The row name would also be different if you didn't specify
tail(dat2)
### Change back to numeric
dat2$yield <- as.numeric(dat2$yield)
str(dat2)
'data.frame': 120 obs. of 4 variables:
$ yield : num 48.9 27.4 39.9 33 29 ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : Factor w/ 2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
### Update dat
dat <- dat1
head(dat, 3)
tail(dat, 6)
dat$year <- as.numeric(as.character(dat$year))
str(dat)
'data.frame': 120 obs. of 4 variables:
$ yield : num 48.9 27.4 39.9 33 29 ...
$ variety: Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : num 1931 1931 1931 1931 1931 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
is.numeric(dat$year)
[1] TRUE
dat$newYield <- as.numeric(dat$yield)/0.4047
str(dat)
'data.frame': 120 obs. of 5 variables:
$ yield : num 48.9 27.4 39.9 33 29 ...
$ variety : Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : num 1931 1931 1931 1931 1931 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
$ newYield: num 120.7 67.8 98.7 81.5 71.6 ...
dat$yield <- NULL
str(dat)
'data.frame': 120 obs. of 4 variables:
$ variety : Factor w/ 10 levels "Svansota","No. 462",..: 3 3 3 3 3 7 7 7 7 7 ...
$ year : num 1931 1931 1931 1931 1931 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 6 4 5 1 2 3 6 4 5 1 ...
$ newYield: num 120.7 67.8 98.7 81.5 71.6 ...
library(stringr)
I <- str_detect(as.character(dat$variety), "^No.")
dat <- dat[I, c("newYield", "variety", "site")]
str(dat)
'data.frame': 36 obs. of 3 variables:
$ newYield: num 106.9 143.6 70.9 112.8 79.5 ...
$ variety : Factor w/ 10 levels "Svansota","No. 462",..: 8 8 8 8 8 8 2 2 2 2 ...
$ site : Factor w/ 6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5 ...