Setup

First we load some data from the San Francisco Open Data Portal. Specifically, we’ll import a selection of 311 calls from San Francisco (more info).

## Load packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Define a URL to a CSV
sf311_url <- "https://data.sfgov.org/resource/vw6y-z8j6.csv?$select=service_request_id,requested_datetime,agency_responsible,service_name,service_subtype,address,neighborhoods_sffind_boundaries,source&$limit=50000&$where=requested_datetime between '2023-10-11T00:00:00' and '2023-10-11T23:59:59'"

## Next, read in the data. 
##  - URLencode() converts spaces in the URL to numeric codes 
##  - read.csv() imports the CSV file from SF Open Data
##  - rename() changes the name of one of the columns
##  - ymd_hms() converts a character column into a date-time column
sf311_df <- read.csv(URLencode(sf311_url)) |> 
  rename(neighborhood = neighborhoods_sffind_boundaries) |> 
  mutate(requested_datetime = ymd_hms(requested_datetime))


If you’d like, un-comment the following line to view the data frame in a View pane:

# View(sf311_df)


Q01. How many rows and how many columns does this data frame have?

dim(sf311_df)
## [1] 1971    8


Q02. Write an expression that returns the column names of this data frame.

Hint: Look for a function that returns the names of a data frame

names(sf311_df)
## [1] "service_request_id" "requested_datetime" "agency_responsible"
## [4] "service_name"       "service_subtype"    "address"           
## [7] "neighborhood"       "source"


Q03. Write an expression that returns the data frame sorted by the responsible agency.

sf311_df |> arrange(agency_responsible) |> head()
##   service_request_id  requested_datetime        agency_responsible
## 1           17408819 2023-10-11 10:22:00 311 Service Request Queue
## 2           17408897 2023-10-11 10:38:00 311 Service Request Queue
## 3           17409782 2023-10-11 13:04:25 311 Service Request Queue
## 4           17409795 2023-10-11 13:07:00 311 Service Request Queue
## 5           17409892 2023-10-11 13:22:00 311 Service Request Queue
## 6           17410438 2023-10-11 15:06:18 311 Service Request Queue
##                                 service_name   service_subtype
## 1                       311 External Request noise_dog_barking
## 2                       311 External Request noise_dog_barking
## 3 General Request - 311CUSTOMERSERVICECENTER             other
## 4                       311 External Request noise_dog_barking
## 5                       311 External Request noise_dog_barking
## 6 General Request - 311CUSTOMERSERVICECENTER         complaint
##                                       address neighborhood source
## 1      243 VIDAL DR, SAN FRANCISCO, CA, 94132   Parkmerced  Phone
## 2 643 DIVISADERO ST, SAN FRANCISCO, CA, 94117    Panhandle  Phone
## 3      Not associated with a specific address                 Web
## 4   530 STOCKTON ST, SAN FRANCISCO, CA, 94108    Chinatown  Phone
## 5     351 FAXON AVE, SAN FRANCISCO, CA, 94112    Ingleside  Phone
## 6      Not associated with a specific address                 Web


Q04. Write an expression that just returns three of columns (of your choice).

sf311_df |> select(service_name, service_subtype, source) |> head()
##                   service_name              service_subtype         source
## 1 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 2                 Noise Report garbage_recycling_collection Mobile/Open311
## 3 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 4                 Noise Report        other_excessive_noise          Phone
## 5 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 6 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311


Q05. Write an expression that returns the range of the requested date & time.

sf311_df |> pull(requested_datetime) |> range()
## [1] "2023-10-11 00:24:00 UTC" "2023-10-11 23:59:47 UTC"


Q06. Write an expression that just returns 311 calls that were submitted by phone.

sf311_df |> filter(source == 'Phone') |> head()
##   service_request_id  requested_datetime              agency_responsible
## 1           17407515 2023-10-11 00:52:00              Noise Report Queue
## 2           17407648 2023-10-11 05:38:00 SFMTA - Parking Enforcement - G
## 3           17407661 2023-10-11 05:52:00                   RPD GGP Queue
## 4           17407671 2023-10-11 06:03:00                DPT Signal Queue
## 5           17407681 2023-10-11 06:17:00            Tenderloin CBD Queue
## 6           17407697 2023-10-11 06:31:00                   PUC Sewer Ops
##                   service_name               service_subtype
## 1                 Noise Report         other_excessive_noise
## 2        General Request - MTA           request_for_service
## 3        Rec and Park Requests Park - Structural_Maintenance
## 4             Damaged Property        Damaged Traffic_Signal
## 5 Street and Sidewalk Cleaning              General Cleaning
## 6                 Sewer Issues                Sewage_back_up
##                                                         address
## 1                      158 ALPINE TER, SAN FRANCISCO, CA, 94117
## 2          626 MISSION BAY BLVD NORTH, SAN FRANCISCO, CA, 94158
## 3 GGP Stow Lake and Strawberry Hill, , SAN FRANCISCO, CA, 94122
## 4                     1836 GEARY BLVD, SAN FRANCISCO, CA, 94115
## 5                  424 LEAVENWORTH ST, SAN FRANCISCO, CA, 94102
## 6                    132 BRODERICK ST, SAN FRANCISCO, CA, 94117
##            neighborhood source
## 1           Buena Vista  Phone
## 2           Mission Bay  Phone
## 3      Golden Gate Park  Phone
## 4 Lower Pacific Heights  Phone
## 5            Tenderloin  Phone
## 6           Buena Vista  Phone


Q07. How many 311 calls were submitted by phone?

sf311_df |> filter(source == 'Phone') |> nrow()
## [1] 310


Q08. Write an expression the returns complaints about barking dogs made by phone.

Hint: Look at the service_subtype column.

sf311_df |> filter(source == 'Phone', service_subtype == 'noise_dog_barking')
##   service_request_id  requested_datetime        agency_responsible
## 1           17408819 2023-10-11 10:22:00 311 Service Request Queue
## 2           17408897 2023-10-11 10:38:00 311 Service Request Queue
## 3           17409795 2023-10-11 13:07:00 311 Service Request Queue
## 4           17409892 2023-10-11 13:22:00 311 Service Request Queue
## 5           17410510 2023-10-11 15:20:00 311 Service Request Queue
##           service_name   service_subtype
## 1 311 External Request noise_dog_barking
## 2 311 External Request noise_dog_barking
## 3 311 External Request noise_dog_barking
## 4 311 External Request noise_dog_barking
## 5 311 External Request noise_dog_barking
##                                       address neighborhood source
## 1      243 VIDAL DR, SAN FRANCISCO, CA, 94132   Parkmerced  Phone
## 2 643 DIVISADERO ST, SAN FRANCISCO, CA, 94117    Panhandle  Phone
## 3   530 STOCKTON ST, SAN FRANCISCO, CA, 94108    Chinatown  Phone
## 4     351 FAXON AVE, SAN FRANCISCO, CA, 94112    Ingleside  Phone
## 5  1341 VALENCIA ST, SAN FRANCISCO, CA, 94110      Mission  Phone


Q09. Write an expression that returns the number of calls per neighborhood, sorted alphabetically by neighborhood.

sf311_df |> group_by(neighborhood) |> summarise(num_calls = n()) |> arrange(neighborhood)
## # A tibble: 105 × 2
##    neighborhood               num_calls
##    <chr>                          <int>
##  1 ""                                15
##  2 "Alamo Square"                    10
##  3 "Anza Vista"                       7
##  4 "Apparel City"                     4
##  5 "Aquatic Park / Ft. Mason"         5
##  6 "Ashbury Heights"                  4
##  7 "Balboa Terrace"                   1
##  8 "Bayview"                         22
##  9 "Bernal Heights"                  68
## 10 "Bret Harte"                       8
## # ℹ 95 more rows


Q10. Write an expression that returns the number of calls per neighborhood, sorted by the number of calls.

sf311_df |> group_by(neighborhood) |> summarise(num_calls = n()) |> arrange(desc(num_calls)) 
## # A tibble: 105 × 2
##    neighborhood    num_calls
##    <chr>               <int>
##  1 Mission               236
##  2 South of Market       130
##  3 Inner Sunset           83
##  4 Outer Sunset           81
##  5 Bernal Heights         68
##  6 Haight Ashbury         62
##  7 Tenderloin             57
##  8 Lower Nob Hill         55
##  9 Outer Richmond         44
## 10 Inner Richmond         43
## # ℹ 95 more rows


Q11. What is the most common service requested?

sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> arrange(desc(num_recs))
## # A tibble: 34 × 2
##    service_name                   num_recs
##    <chr>                             <int>
##  1 Street and Sidewalk Cleaning       1152
##  2 Graffiti                            309
##  3 General Request - PUBLIC WORKS       69
##  4 Illegal Postings                     48
##  5 Abandoned Vehicle                    44
##  6 General Request - MTA                38
##  7 Litter Receptacles                   32
##  8 Sidewalk or Curb                     31
##  9 Sewer Issues                         30
## 10 Tree Maintenance                     28
## # ℹ 24 more rows

Or:

sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> slice_max(num_recs, n = 1)
## # A tibble: 1 × 2
##   service_name                 num_recs
##   <chr>                           <int>
## 1 Street and Sidewalk Cleaning     1152


Q12. What is the least common service requested?

sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> slice_min(num_recs, n = 1)
## # A tibble: 4 × 2
##   service_name                            num_recs
##   <chr>                                      <int>
## 1 General Request - ANIMAL CARE CONTROL          1
## 2 General Request - HUMAN SERVICES AGENCY        1
## 3 General Request - RISK MANAGEMENT              1
## 4 General Request - RPD                          1