### Create the score vector
scores <- c(9.3, 8.7, 9.5, 10, 9, 8.9)
### Indicator for minimum and maximum scores
I <- c(which.min(scores), which.max(scores))
### Compute the average after removing the minimum and maximum
(ave <- mean(scores[-I]))
[1] 9.175
### Method 1: Rep only
ndays <- rep(c(366, rep(365, 3)), length.out=21)
names(ndays) <- 2000:2020
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
366 365 365 365 366 365 365 365 366 365 365 365 366 365 365 365
2016 2017 2018 2019 2020
366 365 365 365 366
### Method 2: Rep with the recycling rule
ndays.recy <- rep(365, 21) + c(1, rep(0, 3))
Warning in rep(365, 21) + c(1, rep(0, 3)): longer object length is not a
multiple of shorter object length
names(ndays.recy) <- 2000:2020
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
366 365 365 365 366 365 365 365 366 365 365 365 366 365 365 365
2016 2017 2018 2019 2020
366 365 365 365 366
### Check if they give identical outputs
identical(ndays, ndays.recy)
[1] TRUE
### Choose the range of i
i <- 1: 100000
### Compute the desired value
x <- ((-1)^(i+1))/i
### First few entries
x[1: 10]
[1] 1.0000000 -0.5000000 0.3333333 -0.2500000 0.2000000 -0.1666667
[7] 0.1428571 -0.1250000 0.1111111 -0.1000000
### Compute the summation
(result <- sum(x))
[1] 0.6931422
Increase the length of i
to see if the summation result varies a lot. It is a good approximation if results are similar. For example, we can increase the maximum of i
from 100,000 to 1,000,000.
### New range of i
i.new <- 1: 1000000
### Compute the desired value
x.new <- ((-1)^(i.new+1))/i.new
### Compute the summation
(result.new <- sum(x.new))
[1] 0.6931467
### Difference in result
abs(result.new - result)
[1] 4.499975e-06
The difference is small, which indicates our approximations are good.
### Create height vector
height <- c(Chris=1.8, Mary=1.65)
### Create weight vector
weight <- c(70, 49)
### Compute BMI
(bmi <- weight/(height^2))
Chris Mary
21.60494 17.99816
#### Create the vector
x <- c(1, 1, 4, 6, 4, 7, 5, 9)
### Sort the vector
[1] 1 1 4 4 5 6 7 9
### Default rank
[1] 1.5 1.5 3.5 6.0 3.5 7.0 5.0 8.0
Note that numbers with 0.5
indicate there are ties.
### Rank of minimum
rank(x, ties.method="min")
[1] 1 1 3 6 3 7 5 8
### The number of unique elements in x
[1] 6
### Method 1: Use diff function
sum(diff(x) < 0)
[1] 2
### Method 2: Compare subsequences
sum(x[-1] < x[-length(x)])
[1] 2
### Get the order of "distance"
I <- order(abs(x - 5))
### Four numbers closest to 5
(z <- x[I][1:4])
[1] 5 4 6 4
### Average
[1] 4.75
### Numbers that differ from 5 by at most 2
(w <- x[abs(x - 5) <= 2])
[1] 4 6 4 7 5
### Average
[1] 5.2
trunc(.5 + -2:4)
[1] -1 0 0 1 2 3 4
round(.5 + -2:4)
[1] -2 0 0 2 2 4 4
Because we have
.5 + -2:4
[1] -1.5 -0.5 0.5 1.5 2.5 3.5 4.5
would truncate the values toward 0, while round
would “round to even” (go to the even digit) when rounding off a 5.
### Load library
### Inspect the data frame
tibble [344 x 8] (S3: tbl_df/tbl/data.frame)
$ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
$ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
$ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
$ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
$ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
$ body_mass_g : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
$ sex : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...
$ year : int [1:344] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
species island bill_length_mm bill_depth_mm
Adelie :152 Biscoe :168 Min. :32.10 Min. :13.10
Chinstrap: 68 Dream :124 1st Qu.:39.23 1st Qu.:15.60
Gentoo :124 Torgersen: 52 Median :44.45 Median :17.30
Mean :43.92 Mean :17.15
3rd Qu.:48.50 3rd Qu.:18.70
Max. :59.60 Max. :21.50
NA's :2 NA's :2
flipper_length_mm body_mass_g sex year
Min. :172.0 Min. :2700 female:165 Min. :2007
1st Qu.:190.0 1st Qu.:3550 male :168 1st Qu.:2007
Median :197.0 Median :4050 NA's : 11 Median :2008
Mean :200.9 Mean :4202 Mean :2008
3rd Qu.:213.0 3rd Qu.:4750 3rd Qu.:2009
Max. :231.0 Max. :6300 Max. :2009
NA's :2 NA's :2
### Get the decreasing indicator
I <- order(penguins$body_mass_g, decreasing=TRUE)
### The species of the 5 largest penguins
(large.spec <- penguins[I[1:5], "species"])
### Tabulate the result in the previous part
Adelie Chinstrap Gentoo
0 0 5
### Mean without NA values
mean(penguins$body_mass_g, na.rm=TRUE)
[1] 4201.754
### Median without NA values
median(penguins$body_mass_g, na.rm=TRUE)
[1] 4050
### Histogram
hist(penguins$body_mass_g, xlab="Body mass", ylab="Count",
main="Histogram for the body mass")
### Side-by-side boxplot
boxplot(body_mass_g ~ sex, data=penguins, xlab="Sex", ylab="Body mass",
main="Side-by-side boxplot for the body mass")
### Bar chart
barplot(table(penguins$species), xlab="Species", ylab="Count",
main="Barchart for the number of penguins with different species")
### Create a new level for NA values
penguins$sex <- as.character(penguins$sex)
penguins$sex[is.na(penguins$sex)] <- "not applicable"
penguins$sex <- factor(penguins$sex)
### Group indicator for plotting
I <- 1:length(levels(penguins$sex))
### Scatterplot
plot(penguins$flipper_length_mm, penguins$body_mass_g,
col=as.numeric(penguins$sex), pch=as.numeric(penguins$sex),
xlab="Flipper length", ylab="Body mass",
main="Scatterplot for body mass versus flipper length")
### Create legend
legend("topleft", legend=levels(penguins$sex),
col=I, pch=I)