First we load some data from the San Francisco Open Data Portal. Specifically, we’ll import a selection of 311 calls from San Francisco (more info).
## Load packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Define a URL to a CSV
sf311_url <- "https://data.sfgov.org/resource/vw6y-z8j6.csv?$select=service_request_id,requested_datetime,agency_responsible,service_name,service_subtype,address,neighborhoods_sffind_boundaries,source&$limit=50000&$where=requested_datetime between '2023-10-11T00:00:00' and '2023-10-11T23:59:59'"
## Next, read in the data.
## - URLencode() converts spaces in the URL to numeric codes
## - read.csv() imports the CSV file from SF Open Data
## - rename() changes the name of one of the columns
## - ymd_hms() converts a character column into a date-time column
sf311_df <- read.csv(URLencode(sf311_url)) |>
rename(neighborhood = neighborhoods_sffind_boundaries) |>
mutate(requested_datetime = ymd_hms(requested_datetime))
If you’d like, un-comment the following line to view the data frame in a View pane:
# View(sf311_df)
dim(sf311_df)
## [1] 1971 8
Hint: Look for a function that returns the names of a data frame
names(sf311_df)
## [1] "service_request_id" "requested_datetime" "agency_responsible"
## [4] "service_name" "service_subtype" "address"
## [7] "neighborhood" "source"
sf311_df |> arrange(agency_responsible) |> head()
## service_request_id requested_datetime agency_responsible
## 1 17408819 2023-10-11 10:22:00 311 Service Request Queue
## 2 17408897 2023-10-11 10:38:00 311 Service Request Queue
## 3 17409782 2023-10-11 13:04:25 311 Service Request Queue
## 4 17409795 2023-10-11 13:07:00 311 Service Request Queue
## 5 17409892 2023-10-11 13:22:00 311 Service Request Queue
## 6 17410438 2023-10-11 15:06:18 311 Service Request Queue
## service_name service_subtype
## 1 311 External Request noise_dog_barking
## 2 311 External Request noise_dog_barking
## 3 General Request - 311CUSTOMERSERVICECENTER other
## 4 311 External Request noise_dog_barking
## 5 311 External Request noise_dog_barking
## 6 General Request - 311CUSTOMERSERVICECENTER complaint
## address neighborhood source
## 1 243 VIDAL DR, SAN FRANCISCO, CA, 94132 Parkmerced Phone
## 2 643 DIVISADERO ST, SAN FRANCISCO, CA, 94117 Panhandle Phone
## 3 Not associated with a specific address Web
## 4 530 STOCKTON ST, SAN FRANCISCO, CA, 94108 Chinatown Phone
## 5 351 FAXON AVE, SAN FRANCISCO, CA, 94112 Ingleside Phone
## 6 Not associated with a specific address Web
sf311_df |> select(service_name, service_subtype, source) |> head()
## service_name service_subtype source
## 1 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 2 Noise Report garbage_recycling_collection Mobile/Open311
## 3 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 4 Noise Report other_excessive_noise Phone
## 5 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
## 6 Street and Sidewalk Cleaning City_garbage_can_overflowing Mobile/Open311
sf311_df |> pull(requested_datetime) |> range()
## [1] "2023-10-11 00:24:00 UTC" "2023-10-11 23:59:47 UTC"
sf311_df |> filter(source == 'Phone') |> head()
## service_request_id requested_datetime agency_responsible
## 1 17407515 2023-10-11 00:52:00 Noise Report Queue
## 2 17407648 2023-10-11 05:38:00 SFMTA - Parking Enforcement - G
## 3 17407661 2023-10-11 05:52:00 RPD GGP Queue
## 4 17407671 2023-10-11 06:03:00 DPT Signal Queue
## 5 17407681 2023-10-11 06:17:00 Tenderloin CBD Queue
## 6 17407697 2023-10-11 06:31:00 PUC Sewer Ops
## service_name service_subtype
## 1 Noise Report other_excessive_noise
## 2 General Request - MTA request_for_service
## 3 Rec and Park Requests Park - Structural_Maintenance
## 4 Damaged Property Damaged Traffic_Signal
## 5 Street and Sidewalk Cleaning General Cleaning
## 6 Sewer Issues Sewage_back_up
## address
## 1 158 ALPINE TER, SAN FRANCISCO, CA, 94117
## 2 626 MISSION BAY BLVD NORTH, SAN FRANCISCO, CA, 94158
## 3 GGP Stow Lake and Strawberry Hill, , SAN FRANCISCO, CA, 94122
## 4 1836 GEARY BLVD, SAN FRANCISCO, CA, 94115
## 5 424 LEAVENWORTH ST, SAN FRANCISCO, CA, 94102
## 6 132 BRODERICK ST, SAN FRANCISCO, CA, 94117
## neighborhood source
## 1 Buena Vista Phone
## 2 Mission Bay Phone
## 3 Golden Gate Park Phone
## 4 Lower Pacific Heights Phone
## 5 Tenderloin Phone
## 6 Buena Vista Phone
sf311_df |> filter(source == 'Phone') |> nrow()
## [1] 310
Hint: Look at the service_subtype column.
sf311_df |> filter(source == 'Phone', service_subtype == 'noise_dog_barking')
## service_request_id requested_datetime agency_responsible
## 1 17408819 2023-10-11 10:22:00 311 Service Request Queue
## 2 17408897 2023-10-11 10:38:00 311 Service Request Queue
## 3 17409795 2023-10-11 13:07:00 311 Service Request Queue
## 4 17409892 2023-10-11 13:22:00 311 Service Request Queue
## 5 17410510 2023-10-11 15:20:00 311 Service Request Queue
## service_name service_subtype
## 1 311 External Request noise_dog_barking
## 2 311 External Request noise_dog_barking
## 3 311 External Request noise_dog_barking
## 4 311 External Request noise_dog_barking
## 5 311 External Request noise_dog_barking
## address neighborhood source
## 1 243 VIDAL DR, SAN FRANCISCO, CA, 94132 Parkmerced Phone
## 2 643 DIVISADERO ST, SAN FRANCISCO, CA, 94117 Panhandle Phone
## 3 530 STOCKTON ST, SAN FRANCISCO, CA, 94108 Chinatown Phone
## 4 351 FAXON AVE, SAN FRANCISCO, CA, 94112 Ingleside Phone
## 5 1341 VALENCIA ST, SAN FRANCISCO, CA, 94110 Mission Phone
sf311_df |> group_by(neighborhood) |> summarise(num_calls = n()) |> arrange(neighborhood)
## # A tibble: 105 × 2
## neighborhood num_calls
## <chr> <int>
## 1 "" 15
## 2 "Alamo Square" 10
## 3 "Anza Vista" 7
## 4 "Apparel City" 4
## 5 "Aquatic Park / Ft. Mason" 5
## 6 "Ashbury Heights" 4
## 7 "Balboa Terrace" 1
## 8 "Bayview" 22
## 9 "Bernal Heights" 68
## 10 "Bret Harte" 8
## # ℹ 95 more rows
sf311_df |> group_by(neighborhood) |> summarise(num_calls = n()) |> arrange(desc(num_calls))
## # A tibble: 105 × 2
## neighborhood num_calls
## <chr> <int>
## 1 Mission 236
## 2 South of Market 130
## 3 Inner Sunset 83
## 4 Outer Sunset 81
## 5 Bernal Heights 68
## 6 Haight Ashbury 62
## 7 Tenderloin 57
## 8 Lower Nob Hill 55
## 9 Outer Richmond 44
## 10 Inner Richmond 43
## # ℹ 95 more rows
sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> arrange(desc(num_recs))
## # A tibble: 34 × 2
## service_name num_recs
## <chr> <int>
## 1 Street and Sidewalk Cleaning 1152
## 2 Graffiti 309
## 3 General Request - PUBLIC WORKS 69
## 4 Illegal Postings 48
## 5 Abandoned Vehicle 44
## 6 General Request - MTA 38
## 7 Litter Receptacles 32
## 8 Sidewalk or Curb 31
## 9 Sewer Issues 30
## 10 Tree Maintenance 28
## # ℹ 24 more rows
Or:
sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> slice_max(num_recs, n = 1)
## # A tibble: 1 × 2
## service_name num_recs
## <chr> <int>
## 1 Street and Sidewalk Cleaning 1152
sf311_df |> group_by(service_name) |> summarize(num_recs = n()) |> slice_min(num_recs, n = 1)
## # A tibble: 4 × 2
## service_name num_recs
## <chr> <int>
## 1 General Request - ANIMAL CARE CONTROL 1
## 2 General Request - HUMAN SERVICES AGENCY 1
## 3 General Request - RISK MANAGEMENT 1
## 4 General Request - RPD 1