R commands

  1. ### Create the score vector
    scores <- c(9.3, 8.7, 9.5, 10, 9, 8.9)
    ### Indicator for minimum and maximum scores
    I <- c(which.min(scores), which.max(scores))
    
    ### Compute the average after removing the minimum and maximum
    (ave <- mean(scores[-I]))
    [1] 9.175
  2. ### Method 1: Rep only
    ndays <- rep(c(366, rep(365, 3)), length.out=21)
    names(ndays) <- 2000:2020
    ndays
    2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 
     366  365  365  365  366  365  365  365  366  365  365  365  366  365  365  365 
    2016 2017 2018 2019 2020 
     366  365  365  365  366 
    ### Method 2: Rep with the recycling rule
    ndays.recy <- rep(365, 21) + c(1, rep(0, 3))
    Warning in rep(365, 21) + c(1, rep(0, 3)): longer object length is not a
    multiple of shorter object length
    names(ndays.recy) <- 2000:2020
    ndays.recy
    2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 
     366  365  365  365  366  365  365  365  366  365  365  365  366  365  365  365 
    2016 2017 2018 2019 2020 
     366  365  365  365  366 
    ### Check if they give identical outputs
    identical(ndays, ndays.recy)
    [1] TRUE
    1. ### Choose the range of i
      i <- 1: 100000
      
      ### Compute the desired value
      x <- ((-1)^(i+1))/i
      
      ### First few entries
      x[1: 10]
       [1]  1.0000000 -0.5000000  0.3333333 -0.2500000  0.2000000 -0.1666667
       [7]  0.1428571 -0.1250000  0.1111111 -0.1000000
    2. ### Compute the summation
      (result <- sum(x))
      [1] 0.6931422
    3. Increase the length of i to see if the summation result varies a lot. It is a good approximation if results are similar. For example, we can increase the maximum of i from 100,000 to 1,000,000.

      ### New range of i
      i.new <- 1: 1000000
      
      ### Compute the desired value
      x.new <- ((-1)^(i.new+1))/i.new
      
      ### Compute the summation
      (result.new <- sum(x.new))
      [1] 0.6931467
      ### Difference in result
      abs(result.new - result)
      [1] 4.499975e-06

      The difference is small, which indicates our approximations are good.

  3. ### Create height vector
    height <- c(Chris=1.8, Mary=1.65)
    
    ### Create weight vector
    weight <- c(70, 49)
    
    ### Compute BMI
    (bmi <- weight/(height^2))
       Chris     Mary 
    21.60494 17.99816 
  4. #### Create the vector
    x <- c(1, 1, 4, 6, 4, 7, 5, 9)
    1. ### Sort the vector
      sort(x)
      [1] 1 1 4 4 5 6 7 9
    2. ### Default rank
      rank(x)
      [1] 1.5 1.5 3.5 6.0 3.5 7.0 5.0 8.0

      Note that numbers with 0.5 indicate there are ties.

      ### Rank of minimum
      rank(x, ties.method="min")
      [1] 1 1 3 6 3 7 5 8
    3. ### The number of unique elements in x
      length(unique(x))
      [1] 6
    4. ### Method 1: Use diff function
      sum(diff(x) < 0)
      [1] 2
      ### Method 2: Compare subsequences
      sum(x[-1] < x[-length(x)])
      [1] 2
    5. ### Get the order of "distance"
      I <- order(abs(x - 5))
      
      ### Four numbers closest to 5
      (z <- x[I][1:4])
      [1] 5 4 6 4
      ### Average
      mean(z)
      [1] 4.75
    6. ### Numbers that differ from 5 by at most 2
      (w <- x[abs(x - 5) <= 2])
      [1] 4 6 4 7 5
      ### Average
      mean(w)
      [1] 5.2
  5. trunc(.5 + -2:4)
    [1] -1  0  0  1  2  3  4
    round(.5 + -2:4)
    [1] -2  0  0  2  2  4  4

    Because we have

    .5 + -2:4
    [1] -1.5 -0.5  0.5  1.5  2.5  3.5  4.5

    trunc would truncate the values toward 0, while round would “round to even” (go to the even digit) when rounding off a 5.

Penguins

### Load library
library(palmerpenguins)
  1. ### Inspect the data frame
    penguins
    str(penguins)
    tibble [344 x 8] (S3: tbl_df/tbl/data.frame)
     $ species          : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
     $ island           : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
     $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
     $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
     $ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
     $ body_mass_g      : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
     $ sex              : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...
     $ year             : int [1:344] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
    head(penguins)
    summary(penguins)
          species          island    bill_length_mm  bill_depth_mm  
     Adelie   :152   Biscoe   :168   Min.   :32.10   Min.   :13.10  
     Chinstrap: 68   Dream    :124   1st Qu.:39.23   1st Qu.:15.60  
     Gentoo   :124   Torgersen: 52   Median :44.45   Median :17.30  
                                     Mean   :43.92   Mean   :17.15  
                                     3rd Qu.:48.50   3rd Qu.:18.70  
                                     Max.   :59.60   Max.   :21.50  
                                     NA's   :2       NA's   :2      
     flipper_length_mm  body_mass_g       sex           year     
     Min.   :172.0     Min.   :2700   female:165   Min.   :2007  
     1st Qu.:190.0     1st Qu.:3550   male  :168   1st Qu.:2007  
     Median :197.0     Median :4050   NA's  : 11   Median :2008  
     Mean   :200.9     Mean   :4202                Mean   :2008  
     3rd Qu.:213.0     3rd Qu.:4750                3rd Qu.:2009  
     Max.   :231.0     Max.   :6300                Max.   :2009  
     NA's   :2         NA's   :2                                 
  2. ### Get the decreasing indicator
    I <- order(penguins$body_mass_g, decreasing=TRUE)
    
    ### The species of the 5 largest penguins
    (large.spec <- penguins[I[1:5], "species"])
  3. ### Tabulate the result in the previous part
    table(large.spec)
    large.spec
       Adelie Chinstrap    Gentoo 
            0         0         5 
  4. ### Mean without NA values
    mean(penguins$body_mass_g, na.rm=TRUE)
    [1] 4201.754
    ### Median without NA values
    median(penguins$body_mass_g, na.rm=TRUE)
    [1] 4050
    1. ### Histogram
      hist(penguins$body_mass_g, xlab="Body mass", ylab="Count",
        main="Histogram for the body mass")
    2. ### Side-by-side boxplot
      boxplot(body_mass_g ~ sex, data=penguins, xlab="Sex", ylab="Body mass",
        main="Side-by-side boxplot for the body mass")
    3. ### Bar chart
      barplot(table(penguins$species), xlab="Species", ylab="Count",
        main="Barchart for the number of penguins with different species")
    4. ### Create a new level for NA values
      penguins$sex <- as.character(penguins$sex)
      penguins$sex[is.na(penguins$sex)] <- "not applicable"
      penguins$sex <- factor(penguins$sex)
      
      ### Group indicator for plotting
      I <- 1:length(levels(penguins$sex))
      
      ### Scatterplot
      plot(penguins$flipper_length_mm, penguins$body_mass_g,
        col=as.numeric(penguins$sex), pch=as.numeric(penguins$sex),
        xlab="Flipper length", ylab="Body mass",
        main="Scatterplot for body mass versus flipper length")
      
      ### Create legend
      legend("topleft", legend=levels(penguins$sex),
        col=I, pch=I)