Load Packages

library(dplyr)

# Specify package preferences for filter, count, select, and arrange
library(conflicted)
conflict_prefer("filter", "dplyr", quiet = TRUE)
conflict_prefer("count", "dplyr", quiet = TRUE)
conflict_prefer("select", "dplyr", quiet = TRUE)
conflict_prefer("arrange", "dplyr", quiet = TRUE)

Table Summaries

The main dplyr function for creating summaries of data is summarise(). The syntax is similar to mutate(). Summary columns can use any aggregate function from base R or dplyr (see the dplyr cheatsheet for a list of functions that are designed to use with summarise()).


Summarise an entire tibble

Compute the mean of several columns (using the na.rm = TRUE to ignore NA values):


library(palmerpenguins)
penguins |> 
  summarise(bill_len_mean = mean(bill_length_mm, na.rm = TRUE), 
            bill_depth_mean = mean(bill_depth_mm, na.rm = TRUE), 
            body_mass_mean = mean(body_mass_g, na.rm = TRUE))


Summarise groups of rows

Often you want to create summaries for groups of rows (i.e., a different unit of analysis).

If we wanted to compute summary stats for each species, you would first group the rows by species with group_by(), and then use summarise().

penguins |> 
  group_by(species) |> 
  summarise(bill_len_mean = mean(bill_length_mm, na.rm = TRUE), 
            bill_depth_mean = mean(bill_depth_mm, na.rm = TRUE), 
            body_mass_mean = mean(body_mass_g, na.rm = TRUE))


How many rows per island?

For this question, we can use the n() function in summarise():

penguins |> 
  group_by(island) |> 
  summarise(num_records = n())


CHALLENGE #1

Compute the number of observations per year.

## Your answer here


CHALLENGE #2

Compute the number of observations per year and species.

Hint: group_by() can take more than one column

# Your answer here


Join tables

Import a dataset with some (fake) genetic information about Penguins:

library(readr)
genetics_tbl <- read_csv("./data/penguin_genetic_diversity.csv")
Rows: 3 Columns: 5── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): species, photo
dbl (3): haplotype_div, nucleotide_div, tajima_d
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genetics_tbl


We can join these columns to the Palmer Penguins dataset:

penguins |>
  left_join(genetics_tbl, by = "species") |> 
  head()

Reshape Data

Wide to Long

Start by creating a ‘wide’ data frame:

cases_df <- data.frame(country = c("FR", "DE", "US"),
                       `2011` = c(7000,5800,15000),
                       `2012` = c(6900,6000,14000),
                       `2013` = c(7000,6200,13000),
                       check.names = FALSE)
cases_df


Next, we combine the values of three year columns into one, with another column for the year:

library(tidyr)

cases_tidy_df <- cases_df |> 
  pivot_longer(cols = c("2011", "2012", "2013"), 
               names_to = "year", 
               values_to = "cases")

cases_tidy_df


Long to wide

We begin by importing the January 2050 projected daily minimum and maximum temperature for Sacramento:

sac_temps_tbl <- read_csv("./data/sacramento_daily_temp_jan2050.csv")
Rows: 248 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): clim_var, period, gcm, scenario
dbl  (1): temp_f
date (1): dt
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sac_temps_tbl |> arrange(dt)


Convert from a long to wide format:

sac_temps_tbl |> 
  pivot_wider(names_from = clim_var, values_from = temp_f)


Compute Daily Temperature Range

Use the ‘new’ columns to compute the daily temperature range:

sac_temps_tbl |> 
  pivot_wider(names_from = clim_var, values_from = temp_f) |> 
  mutate(diurnal_range_f = tasmax - tasmin) |> 
  head()


End

Remember to save your work to render a HTML copy of the notebook.

LS0tDQp0aXRsZTogIkRhdGEgV3JhbmdsaW5nIDI6IFRhYmxlIFN1bW1hcmllcywgSm9pbnMsICYgUmVzaGFwaW5nIg0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMg0KICAgIHRvY19mbG9hdDogeWVzDQotLS0NCg0KYGBge2NzcyBlY2hvID0gRkFMU0V9DQpoMSxoMixoMyB7Zm9udC13ZWlnaHQ6Ym9sZDt9DQpoMSB7Zm9udC1zaXplOjI0cHg7fQ0KaDIge2ZvbnQtc2l6ZToyMHB4O30NCmgzIHtmb250LXNpemU6MTZweDt9DQpgYGANClwNCg0KIyBMb2FkIFBhY2thZ2VzIA0KDQpgYGB7ciBjaHVuazAxfQ0KbGlicmFyeShkcGx5cikNCg0KIyBTcGVjaWZ5IHBhY2thZ2UgcHJlZmVyZW5jZXMgZm9yIGZpbHRlciwgY291bnQsIHNlbGVjdCwgYW5kIGFycmFuZ2UNCmxpYnJhcnkoY29uZmxpY3RlZCkNCmNvbmZsaWN0X3ByZWZlcigiZmlsdGVyIiwgImRwbHlyIiwgcXVpZXQgPSBUUlVFKQ0KY29uZmxpY3RfcHJlZmVyKCJjb3VudCIsICJkcGx5ciIsIHF1aWV0ID0gVFJVRSkNCmNvbmZsaWN0X3ByZWZlcigic2VsZWN0IiwgImRwbHlyIiwgcXVpZXQgPSBUUlVFKQ0KY29uZmxpY3RfcHJlZmVyKCJhcnJhbmdlIiwgImRwbHlyIiwgcXVpZXQgPSBUUlVFKQ0KYGBgDQoNCiMgVGFibGUgU3VtbWFyaWVzDQoNClRoZSBtYWluIGRwbHlyIGZ1bmN0aW9uIGZvciBjcmVhdGluZyBzdW1tYXJpZXMgb2YgZGF0YSBpcyBgc3VtbWFyaXNlKClgLiBUaGUgc3ludGF4IGlzIHNpbWlsYXIgdG8gYG11dGF0ZSgpYC4gU3VtbWFyeSBjb2x1bW5zIGNhbiB1c2UgYW55IGFnZ3JlZ2F0ZSBmdW5jdGlvbiBmcm9tIGJhc2UgUiBvciBkcGx5ciAoc2VlIHRoZSBbZHBseXIgY2hlYXRzaGVldF0oaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3JzdHVkaW8vY2hlYXRzaGVldHMvbWFpbi9kYXRhLXRyYW5zZm9ybWF0aW9uLnBkZikgZm9yIGEgbGlzdCBvZiBmdW5jdGlvbnMgdGhhdCBhcmUgZGVzaWduZWQgdG8gdXNlIHdpdGggYHN1bW1hcmlzZSgpYCkuDQoNClwNCg0KIyMgU3VtbWFyaXNlIGFuIGVudGlyZSB0aWJibGUNCg0KQ29tcHV0ZSB0aGUgbWVhbiBvZiBzZXZlcmFsIGNvbHVtbnMgKHVzaW5nIHRoZSBgbmEucm0gPSBUUlVFYCB0byBpZ25vcmUgTkEgdmFsdWVzKToNCg0KYGBge3IgY2h1bmswMn0NCg0KbGlicmFyeShwYWxtZXJwZW5ndWlucykNCnBlbmd1aW5zIHw+IA0KICBzdW1tYXJpc2UoYmlsbF9sZW5fbWVhbiA9IG1lYW4oYmlsbF9sZW5ndGhfbW0sIG5hLnJtID0gVFJVRSksIA0KICAgICAgICAgICAgYmlsbF9kZXB0aF9tZWFuID0gbWVhbihiaWxsX2RlcHRoX21tLCBuYS5ybSA9IFRSVUUpLCANCiAgICAgICAgICAgIGJvZHlfbWFzc19tZWFuID0gbWVhbihib2R5X21hc3NfZywgbmEucm0gPSBUUlVFKSkNCmBgYA0KDQpcDQoNCiMjIFN1bW1hcmlzZSBncm91cHMgb2Ygcm93cw0KDQpPZnRlbiB5b3Ugd2FudCB0byBjcmVhdGUgc3VtbWFyaWVzIGZvciBncm91cHMgb2Ygcm93cyAoaS5lLiwgYSBkaWZmZXJlbnQgdW5pdCBvZiBhbmFseXNpcykuIA0KDQpJZiB3ZSB3YW50ZWQgdG8gY29tcHV0ZSBzdW1tYXJ5IHN0YXRzIGZvciBlYWNoIHNwZWNpZXMsIHlvdSB3b3VsZCBmaXJzdCBncm91cCB0aGUgcm93cyBieSBzcGVjaWVzIHdpdGggYGdyb3VwX2J5KClgLCBhbmQgdGhlbiB1c2UgYHN1bW1hcmlzZSgpYC4NCg0KYGBge3IgY2h1bmswM30NCnBlbmd1aW5zIHw+IA0KICBncm91cF9ieShzcGVjaWVzKSB8PiANCiAgc3VtbWFyaXNlKGJpbGxfbGVuX21lYW4gPSBtZWFuKGJpbGxfbGVuZ3RoX21tLCBuYS5ybSA9IFRSVUUpLCANCiAgICAgICAgICAgIGJpbGxfZGVwdGhfbWVhbiA9IG1lYW4oYmlsbF9kZXB0aF9tbSwgbmEucm0gPSBUUlVFKSwgDQogICAgICAgICAgICBib2R5X21hc3NfbWVhbiA9IG1lYW4oYm9keV9tYXNzX2csIG5hLnJtID0gVFJVRSkpDQpgYGANCg0KXA0KDQoqKkhvdyBtYW55IHJvd3MgcGVyIGlzbGFuZD8qKg0KDQpGb3IgdGhpcyBxdWVzdGlvbiwgd2UgY2FuIHVzZSB0aGUgYG4oKWAgZnVuY3Rpb24gaW4gYHN1bW1hcmlzZSgpYDoNCg0KYGBge3IgY2h1bmswNH0NCnBlbmd1aW5zIHw+IA0KICBncm91cF9ieShpc2xhbmQpIHw+IA0KICBzdW1tYXJpc2UobnVtX3JlY29yZHMgPSBuKCkpDQpgYGANCg0KXA0KDQojIENIQUxMRU5HRSAjMQ0KDQpDb21wdXRlIHRoZSBudW1iZXIgb2Ygb2JzZXJ2YXRpb25zIHBlciB5ZWFyLg0KDQpgYGB7ciBjaHVuazA1fQ0KIyMgWW91ciBhbnN3ZXIgaGVyZQ0KDQpgYGANCg0KXA0KDQojIENIQUxMRU5HRSAjMg0KDQpDb21wdXRlIHRoZSBudW1iZXIgb2Ygb2JzZXJ2YXRpb25zIHBlciB5ZWFyIGFuZCBzcGVjaWVzLg0KDQpIaW50OiBgZ3JvdXBfYnkoKWAgY2FuIHRha2UgbW9yZSB0aGFuIG9uZSBjb2x1bW4NCg0KYGBge3IgY2h1bmswNn0NCiMgWW91ciBhbnN3ZXIgaGVyZQ0KDQpgYGANCg0KXA0KDQojIEpvaW4gdGFibGVzDQoNCkltcG9ydCBhIGRhdGFzZXQgd2l0aCBzb21lIChmYWtlKSBnZW5ldGljIGluZm9ybWF0aW9uIGFib3V0IFBlbmd1aW5zOg0KDQpgYGB7ciBjaHVuazA3fQ0KbGlicmFyeShyZWFkcikNCmdlbmV0aWNzX3RibCA8LSByZWFkX2NzdigiLi9kYXRhL3Blbmd1aW5fZ2VuZXRpY19kaXZlcnNpdHkuY3N2IikNCmdlbmV0aWNzX3RibA0KYGBgDQoNClwNCg0KV2UgY2FuIGpvaW4gdGhlc2UgY29sdW1ucyB0byB0aGUgUGFsbWVyIFBlbmd1aW5zIGRhdGFzZXQ6DQoNCmBgYHtyIGNodW5rMDh9DQpwZW5ndWlucyB8Pg0KICBsZWZ0X2pvaW4oZ2VuZXRpY3NfdGJsLCBieSA9ICJzcGVjaWVzIikgfD4gDQogIGhlYWQoKQ0KYGBgDQoNCiMgUmVzaGFwZSBEYXRhDQoNCiMjIFdpZGUgdG8gTG9uZw0KDQpTdGFydCBieSBjcmVhdGluZyBhICd3aWRlJyBkYXRhIGZyYW1lOg0KDQpgYGB7ciBjaHVuazA5fQ0KY2FzZXNfZGYgPC0gZGF0YS5mcmFtZShjb3VudHJ5ID0gYygiRlIiLCAiREUiLCAiVVMiKSwNCiAgICAgICAgICAgICAgICAgICAgICAgYDIwMTFgID0gYyg3MDAwLDU4MDAsMTUwMDApLA0KICAgICAgICAgICAgICAgICAgICAgICBgMjAxMmAgPSBjKDY5MDAsNjAwMCwxNDAwMCksDQogICAgICAgICAgICAgICAgICAgICAgIGAyMDEzYCA9IGMoNzAwMCw2MjAwLDEzMDAwKSwNCiAgICAgICAgICAgICAgICAgICAgICAgY2hlY2submFtZXMgPSBGQUxTRSkNCmNhc2VzX2RmDQpgYGANCg0KXA0KDQpOZXh0LCB3ZSBjb21iaW5lIHRoZSB2YWx1ZXMgb2YgdGhyZWUgeWVhciBjb2x1bW5zIGludG8gb25lLCB3aXRoIGFub3RoZXIgY29sdW1uIGZvciB0aGUgeWVhcjoNCg0KYGBge3IgY2h1bmsxMH0NCmxpYnJhcnkodGlkeXIpDQoNCmNhc2VzX3RpZHlfZGYgPC0gY2FzZXNfZGYgfD4gDQogIHBpdm90X2xvbmdlcihjb2xzID0gYygiMjAxMSIsICIyMDEyIiwgIjIwMTMiKSwgDQogICAgICAgICAgICAgICBuYW1lc190byA9ICJ5ZWFyIiwgDQogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAiY2FzZXMiKQ0KDQpjYXNlc190aWR5X2RmDQpgYGANCg0KXA0KDQojIyBMb25nIHRvIHdpZGUNCg0KV2UgYmVnaW4gYnkgaW1wb3J0aW5nIHRoZSBKYW51YXJ5IDIwNTAgcHJvamVjdGVkIGRhaWx5IG1pbmltdW0gYW5kIG1heGltdW0gdGVtcGVyYXR1cmUgZm9yIFNhY3JhbWVudG86DQoNCmBgYHtyIGNodW5rMTF9DQpzYWNfdGVtcHNfdGJsIDwtIHJlYWRfY3N2KCIuL2RhdGEvc2FjcmFtZW50b19kYWlseV90ZW1wX2phbjIwNTAuY3N2IikNCnNhY190ZW1wc190YmwgfD4gYXJyYW5nZShkdCkNCmBgYA0KDQpcDQoNCkNvbnZlcnQgZnJvbSBhIGxvbmcgdG8gd2lkZSBmb3JtYXQ6DQoNCmBgYHtyIGNodW5rMTJ9DQpzYWNfdGVtcHNfdGJsIHw+IA0KICBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gY2xpbV92YXIsIHZhbHVlc19mcm9tID0gdGVtcF9mKQ0KYGBgDQoNClwNCg0KIyMgQ29tcHV0ZSBEYWlseSBUZW1wZXJhdHVyZSBSYW5nZQ0KDQpVc2UgdGhlICduZXcnIGNvbHVtbnMgdG8gY29tcHV0ZSB0aGUgZGFpbHkgdGVtcGVyYXR1cmUgcmFuZ2U6DQoNCmBgYHtyIGNodW5rMTN9DQpzYWNfdGVtcHNfdGJsIHw+IA0KICBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gY2xpbV92YXIsIHZhbHVlc19mcm9tID0gdGVtcF9mKSB8PiANCiAgbXV0YXRlKGRpdXJuYWxfcmFuZ2VfZiA9IHRhc21heCAtIHRhc21pbikgfD4gDQogIGhlYWQoKQ0KYGBgDQoNClwNCg0KIyBFbmQNCg0KUmVtZW1iZXIgdG8gc2F2ZSB5b3VyIHdvcmsgdG8gcmVuZGVyIGEgSFRNTCBjb3B5IG9mIHRoZSBub3RlYm9vay4NCg==