library(stringr)
Q1 <- "\"Age is an issue of mind over matter. If you don't mind, it doesn't matter.\"\n\n-- Mark Twain"
writeLines(Q1)
"Age is an issue of mind over matter. If you don't mind, it doesn't matter."
-- Mark Twain
Q2 <- c('apple', 'appleapple', 'BananaBanana', 'Banana', 'cherryCherry')
### Get the first half of the string
(first.half <- str_sub(Q2, 1, str_count(Q2)/2))
[1] "ap" "apple" "Banana" "Ban" "cherry"
### Get the second half of the string
(second.half <- str_sub(Q2, -(str_count(Q2)/2), -1))
[1] "le" "apple" "Banana" "ana" "Cherry"
### Check if they are equal
str_to_lower(first.half) == str_to_lower(second.half)
[1] FALSE TRUE TRUE FALSE TRUE
Q3 <- "\"Dear Friends,\n\nThe exuberance of the change of seasons at Iowa State University never grows old — from the blooms of spring to the joys of another graduating class. During this holiday season, let your spirits be lifted by family and friends — and by this short video of captivating images of this magnificent campus.\n\nOur best wishes to you and yours for a happy, hope-filled 2022!\n\nWendy Wintersteen and Robert Waggoner\""
writeLines(Q3)
"Dear Friends,
The exuberance of the change of seasons at Iowa State University never grows old — from the blooms of spring to the joys of another graduating class. During this holiday season, let your spirits be lifted by family and friends — and by this short video of captivating images of this magnificent campus.
Our best wishes to you and yours for a happy, hope-filled 2022!
Wendy Wintersteen and Robert Waggoner"
### Method 1
length(str_split(Q3, pattern = "[^[:alnum:]]+")[[1]]) - 2
[1] 71
### Method 2
str_count(Q3, pattern = "[:alnum:]+")
[1] 71
### Method 3
str_count(Q3, "\\w+")
[1] 71
(Not graded)
script <- readLines("matrix_script.txt")
str(script)
chr [1:7166] "FADE IN:" "" "ON COMPUTER SCREEN" ...
head(script)
[1] "FADE IN:"
[2] ""
[3] "ON COMPUTER SCREEN"
[4] "so close it has no boundaries."
[5] ""
[6] "A blinding cursor pulses in the electric darkness like a"
sum(str_count(script, "^NEO$"))
[1] 212
sum(str_count(script, "Trin"))
[1] 128
sum(str_count(script, "Trin.?"))
[1] 128
sum(str_count(script, "Trin\\w*"))
[1] 128
### Line 640 causes problem
sum(str_count(script, "Trin.*"))
[1] 127
happy <- readr::read_csv("happy.csv")
### happy
(lv.happy <- unique(happy$happy))
[1] "not too happy" "pretty happy" "very happy" NA
happy$happy <- factor(happy$happy, levels = lv.happy)
levels(happy$happy)
[1] "not too happy" "pretty happy" "very happy"
### marital
(lv.marital <- unique(happy$marital))
[1] "never married" "separated" "divorced" "widowed"
[5] "married" NA
### Switch the order
happy$marital <- factor(happy$marital, levels = lv.marital[c(1, 5, 2, 3, 4)])
levels(happy$marital)
[1] "never married" "married" "separated" "divorced"
[5] "widowed"
### degree
(lv.degree <- unique(happy$degree))
[1] "bachelor" "lt high school" "high school" "graduate"
[5] "junior college" NA
### Switch the order
happy$degree <- factor(happy$degree, levels = lv.degree[c(2, 3, 5, 1, 4)])
levels(happy$degree)
[1] "lt high school" "high school" "junior college" "bachelor"
[5] "graduate"
### health
(lv.health <- unique(happy$health))
[1] "good" "fair" "excellent" "poor" NA
### Switch the order
happy$health <- factor(happy$health, levels = lv.health[c(4, 2, 1, 3)])
levels(happy$health)
[1] "poor" "fair" "good" "excellent"
table(happy$happy)
not too happy pretty happy very happy
7332 32256 18122
table(happy$health)
poor fair good excellent
2638 8768 20788 13827
table(happy$happy, happy$health)
poor fair good excellent
not too happy 900 1785 2040 888
pretty happy 1141 4758 11744 6045
very happy 424 1651 5563 5959
(Not graded)
marital.happy <- table(happy$marital, happy$happy)
### Recycling rule
marital.happy <- marital.happy/rowSums(marital.happy)
# marital.happy <- marital.happy/rep(rowSums(marital.happy), 3)
### Type I
barplot(t(marital.happy), legend.text=TRUE, args.legend=list(bg="transparent"))
### Create plotting vector
vec.plot <- as.vector(marital.happy)
names(vec.plot) <- as.vector(outer(str_sub(rownames(marital.happy), 1, 3), str_sub(colnames(marital.happy), 1, 3), paste, sep="."))
### Type II
barplot(vec.plot)
### Combine happy levels
### happy: pretty happy, very happy
temp <- happy
levels(temp$happy) <- c("not too happy", "happy", "happy")
dat.tab <- temp[temp$happy=="happy", "marital"]
barplot(table(dat.tab)/nrow(dat.tab))
(Not graded)
### Define the level of marital and happy appropriately
### Here use levels other than never married as married
### Here use levels other than not too happy as happy
### Result would be different if all NA in the data is removed
### Here only removed NA in marital and happy
Q4a <- happy[(happy$marital!="never married") & (happy$happy!="not too happy") & !is.na(happy$marital) & !is.na(happy$happy), ]
str(Q4a)
tibble [40,211 x 10] (S3: tbl_df/tbl/data.frame)
$ happy : Factor w/ 3 levels "not too happy",..: 2 2 2 2 2 2 3 3 3 3 ...
$ year : num [1:40211] 1972 1972 1972 1972 1972 ...
$ age : num [1:40211] 48 61 30 30 56 54 41 24 62 46 ...
$ sex : chr [1:40211] "female" "female" "female" "female" ...
$ marital: Factor w/ 5 levels "never married",..: 3 3 3 3 3 3 3 3 3 3 ...
$ degree : Factor w/ 5 levels "lt high school",..: 2 2 2 2 1 1 1 2 1 4 ...
$ finrela: chr [1:40211] "average" "above average" "below average" "average" ...
$ health : Factor w/ 4 levels "poor","fair",..: 4 3 2 3 2 4 4 2 2 4 ...
$ partyid: chr [1:40211] "independent" "strong democrat" "strong democrat" "strong democrat" ...
$ wtssall: num [1:40211] 0.889 0.889 0.889 0.889 1.334 ...
Q4b <- happy[(happy$marital!="never married") | (happy$happy!="not too happy") & !is.na(happy$marital) & !is.na(happy$happy), ]
str(Q4b)
tibble [59,452 x 10] (S3: tbl_df/tbl/data.frame)
$ happy : Factor w/ 3 levels "not too happy",..: 1 2 1 2 2 1 2 2 2 2 ...
$ year : num [1:59452] 1972 1972 1972 1972 1972 ...
$ age : num [1:59452] 70 48 27 61 26 28 21 30 30 56 ...
$ sex : chr [1:59452] "male" "female" "female" "female" ...
$ marital: Factor w/ 5 levels "never married",..: 3 3 3 3 1 4 1 3 3 3 ...
$ degree : Factor w/ 5 levels "lt high school",..: 1 2 4 2 2 2 2 2 2 1 ...
$ finrela: chr [1:59452] "above average" "average" "average" "above average" ...
$ health : Factor w/ 4 levels "poor","fair",..: 2 4 3 3 3 4 4 2 3 2 ...
$ partyid: chr [1:59452] "not str democrat" "independent" "not str democrat" "strong democrat" ...
$ wtssall: num [1:59452] 0.889 0.889 0.889 0.889 0.445 ...
sum((happy$marital!="never married") | (happy$happy!="not too happy"), na.rm = TRUE) - sum((happy$marital!="never married") & (happy$happy!="not too happy"), na.rm = TRUE)
[1] 19230
sum((happy$marital=="never married") & (happy$happy=="not too happy"), na.rm = TRUE)
[1] 1920
happy$partyid[happy$partyid %in% c("ind,near dem", "not str democrat", "strong democrat")] <- "democrat"
happy$partyid[happy$partyid %in% c("ind,near rep", "not str republican", "strong republican")] <- "republican"
### Results may vary when NA are removed
table(happy$partyid)
democrat independent other party republican
30426 9474 995 21186