Homework 2: Download and Plot Weather Station Data using the Synoptic API - SOLUTIONS

1 Overview

In this exercise, you will import and plot a) temperature and b) precipitation data for a weather station of your choice using the Synoptic API. The outputs will look something like:




2 Preparation

2.1 1. Create a Synoptic Account

Create an account on Synoptic Data (instructions). Either an open access or a commercial trial account will work. Create a private key and public token. You will use your public token when calling the API.

2.2 2. Select a Weather Station

Open the Synoptic Data Viewer: https://viewer.synopticdata.com/

Zoom and pan to find the station of your choice. Try to find a weather station that records both temperature and precipitation (this includes stations in the CIMIS network, RAWS, Global METAR, and others).

Click on a station to view the station ID and data availability.

2.3 3. Read the docs

Read the documentation for the Synoptic time series end point. Take note of:

  • the base URL
  • parameters (required and optional)

Tip: You can test your request with the Synoptic Query Builder: https://demos.synopticdata.com/query-builder/


3 Part 1. Download hourly temperature data, compute daily min and max, and plot

3.1 Create the API Request

Create an API request object for the Synoptic Time Series End Point, getting hourly temperature and precipitation data for the station of your choice from Oct 1, 2023 thru yesterday (i.e., the current water year).

library(httr2)
library(lubridate) |> suppressPackageStartupMessages()

synoptic_ts_baseurl <- "https://api.synopticdata.com/v2/stations/timeseries"

my_token <- readLines("~/My Tokens/my_synoptic_token_hw02.txt", n = 1)

station_id_chr <- "HSEC1"

## Create the start time (midnight Oct 1, 2023 > converted to UTC > converted to a character 
## string formatted as YYYYmmddHHMM)

start_utc_chr <- make_datetime(year = 2023, month = 10, day = 1, hour = 0, min = 0, sec = 0, 
                               tz = "America/Los_Angeles") |> 
  with_tz("UTC") |> 
  format("%Y%m%d%H%M")

start_utc_chr
[1] "202310010700"
## Create the end time (11:59pm yesterday >> converted to UTC >> converted to a character 
## string formatted as YYYYmmddHHMM)

end_utc_chr <- (as_datetime(today() - days(1), tz = "America/Los_Angeles") + 
                  hours(23) + minutes(59)) |>  
  with_tz("UTC") |> 
  format("%Y%m%d%H%M")

end_utc_chr
[1] "202405210659"
weather_vars <- "air_temp"

## Create the request object:
holister_tas_req <- request(synoptic_ts_baseurl) |> 
  req_headers("Accept" = "application/json") |> 
  req_url_query(token = my_token,
                start = start_utc_chr,
                end = end_utc_chr,
                stid = station_id_chr,
                vars = weather_vars,
                units = "english",
                obtimezone = "local",
                .multi = "comma")

holister_tas_req
<httr2_request>
GET
https://api.synopticdata.com/v2/stations/timeseries?token=8a8a02a7adb14d74af68b54e494e30ec&start=202310010700&end=202405210659&stid=HSEC1&vars=air_temp&units=english&obtimezone=local
Headers:
• Accept: 'application/json'
Body: empty


3.2 Perform the request

## Do a dry run
# holister_tas_req |> req_dry_run()  

## Perform the request
holister_tas_resp <- holister_tas_req |> req_perform()


View the response:

holister_tas_resp
<httr2_response>
GET
https://api.synopticdata.com/v2/stations/timeseries?token=8a8a02a7adb14d74af68b54e494e30ec&start=202310010700&end=202405210659&stid=HSEC1&vars=air_temp&units=english&obtimezone=local
Status: 200 OK
Content-Type: application/json
Body: In memory (179848 bytes)


3.3 Convert the response into a tibble

Extract the body of the response as a list object:

holister_tas_lst <- holister_tas_resp |> resp_body_json()

## Inspect the structure
## holister_tas_lst |> View()


Pull out the dates:

holister_tas_dt <- holister_tas_lst$STATION[[1]]$OBSERVATIONS$date_time |> 
  unlist() |> 
  ymd_hms(tz = "America/Los_Angeles")
Date in ISO8601 format; converting timezone from UTC to "America/Los_Angeles".
length(holister_tas_dt)
[1] 5591
head(holister_tas_dt)
[1] "2023-10-01 00:13:00 PDT" "2023-10-01 01:13:00 PDT"
[3] "2023-10-01 02:13:00 PDT" "2023-10-01 03:13:00 PDT"
[5] "2023-10-01 04:13:00 PDT" "2023-10-01 05:13:00 PDT"
tail(holister_tas_dt)
[1] "2024-05-20 18:13:00 PDT" "2024-05-20 19:13:00 PDT"
[3] "2024-05-20 20:13:00 PDT" "2024-05-20 21:13:00 PDT"
[5] "2024-05-20 22:13:00 PDT" "2024-05-20 23:13:00 PDT"


Pull out the temperature values:

TIP: You can convert NULL values in a list to NAs with purrr::modify_if(). Example:

lst <- list(a = 1, b = 2, c = NULL, d = 4)
str(lst)
List of 4
 $ a: num 1
 $ b: num 2
 $ c: NULL
 $ d: num 4
library(purrr)
lst_nonull <- modify_if(lst, is_null, ~NA)
str(lst_nonull)
List of 4
 $ a: num 1
 $ b: num 2
 $ c: logi NA
 $ d: num 4


holister_tas_temp_int <- holister_tas_lst$STATION[[1]]$OBSERVATIONS$air_temp_set_1 |> 
  modify_if(is_null, ~NA) |> 
  unlist()

length(holister_tas_temp_int)
[1] 5591
head(holister_tas_temp_int)
[1] 57 57 57 57 57 57
tail(holister_tas_temp_int)
[1] 64 62 55 52 51 50


Combine the dates and temperature values into a tibble:

library(dplyr, warn.conflicts = FALSE)

holister_tas_hly_tbl <- tibble(
  stat_id = "HSEC1",
  dt = holister_tas_dt,
  tas = holister_tas_temp_int
)

glimpse(holister_tas_hly_tbl)
Rows: 5,591
Columns: 3
$ stat_id <chr> "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1",…
$ dt      <dttm> 2023-10-01 00:13:00, 2023-10-01 01:13:00, 2023-10-01 02:13:00…
$ tas     <dbl> 57, 57, 57, 57, 57, 57, 56, 56, 57, 59, 61, 63, 68, 72, 74, 71…


3.4 Aggregate by day

As can be seen above, the daily we got is hourly. So we need to compute the minimum and maximum temperature by day.

holister_tas_dly_tbl <- holister_tas_hly_tbl |> 
  group_by(date = date(dt)) |> 
  summarise(tasmin = min(tas), tasmax = max(tas), .groups = "drop") 

glimpse(holister_tas_dly_tbl)
Rows: 233
Columns: 3
$ date   <date> 2023-10-01, 2023-10-02, 2023-10-03, 2023-10-04, 2023-10-05, 20…
$ tasmin <dbl> 55, 49, 49, 54, 57, 59, 60, 56, 55, 61, 54, 50, 50, 50, 50, 51,…
$ tasmax <dbl> 74, 77, 84, 92, 96, 99, 100, 83, 71, 72, 73, 80, 82, 81, 85, 83…


3.5 Plot the minimum and maximum daily temperature

library(ggplot2)
ggplot(holister_tas_dly_tbl, mapping = aes(x = date)) +
  geom_line(mapping = aes(y = tasmin), col = "blue") +
  geom_line(mapping = aes(y = tasmax), col = "red") + 
  ylab("temp (F)") +
  xlab("") +
  labs(title = "Daily Minimum and Maximum Temperature for Hollister, CA", 
       subtitle = "Oct 2023 - May 2024")


4 Part II (Bonus): Download and plot daily prepcipitation

To download the precipitation values, we can re-use most of the contents of our temperature request object, but change the vars parameter to precip, and add precip = 1 to enable derived precipitation (see the Time Series endpoint docs for details).

4.1 Create the request object

holister_pr_req <- request(synoptic_ts_baseurl) |> 
  req_headers("Accept" = "application/json") |> 
  req_url_query(token = my_token,
                start = start_utc_chr,
                end = end_utc_chr,
                stid = station_id_chr,
                vars = "precip",
                precip = 1,                   ## recommended in  the API Docs
                units = "english",
                obtimezone = "local",
                .multi = "comma")

holister_pr_req
<httr2_request>
GET
https://api.synopticdata.com/v2/stations/timeseries?token=8a8a02a7adb14d74af68b54e494e30ec&start=202310010700&end=202405210659&stid=HSEC1&vars=precip&precip=1&units=english&obtimezone=local
Headers:
• Accept: 'application/json'
Body: empty


4.2 Perform the request

holister_pr_resp <- holister_pr_req |> req_perform()
holister_pr_resp
<httr2_response>
GET
https://api.synopticdata.com/v2/stations/timeseries?token=8a8a02a7adb14d74af68b54e494e30ec&start=202310010700&end=202405210659&stid=HSEC1&vars=precip&precip=1&units=english&obtimezone=local
Status: 200 OK
Content-Type: application/json
Body: In memory (203689 bytes)


4.3 Convert the body to a tibble

First we convert the body to a list:

holister_pr_lst <- holister_pr_resp |> resp_body_json()
# holister_pr_lst |> View()


Pull out the dates:

holister_pr_dt <- holister_pr_lst$STATION[[1]]$OBSERVATIONS$date_time |> 
  unlist() |> 
  ymd_hms(tz = "America/Los_Angeles")
Date in ISO8601 format; converting timezone from UTC to "America/Los_Angeles".
length(holister_pr_dt)
[1] 5591


Pull out the precip values:

holister_pr_int_dbl <- holister_pr_lst$STATION[[1]]$OBSERVATIONS$precip_intervals_set_1d |> 
  modify_if(is_null, ~NA) |> 
  unlist()

length(holister_pr_int_dbl)
[1] 5591


Combine the date and hourly precipitation vectors in a tibble:

holister_pr_hly_tbl <- tibble(
  stat_id = "HSEC1",
  dt = holister_pr_dt,
  pr = holister_pr_int_dbl
)

glimpse(holister_pr_hly_tbl)
Rows: 5,591
Columns: 3
$ stat_id <chr> "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1", "HSEC1",…
$ dt      <dttm> 2023-10-01 00:13:00, 2023-10-01 01:13:00, 2023-10-01 02:13:00…
$ pr      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…


4.4 Compute the daily totals and plot

First, we compute the daily totals:

holister_pr_dly_tbl <- holister_pr_hly_tbl |> 
  group_by(date = date(dt)) |> 
  summarise(daily_precip = sum(pr), .groups = "drop")

head(holister_pr_dly_tbl)


Plot them as a column chart:

ggplot(holister_pr_dly_tbl, mapping = aes(x = date, y = daily_precip)) +
  geom_col() + 
  ylab("precip (in)") +
  xlab("") +
  labs(title = "Daily Precipitation Totals for Hollister, CA", 
       subtitle = "Oct 2023 - May 2024")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_col()`).


4.5 Plot the accumulated precipitation

library(tidyr)

holister_pr_dly_tbl |> 
  replace_na(list(daily_precip = 0)) |> 
  mutate(acc_pr = cumsum(daily_precip)) |> 
  ggplot(holister_pr_dly_tbl, mapping = aes(x = date, y = acc_pr)) +
  geom_line() + 
  ylab("precip (in)") +
  xlab("") +
  labs(title = "Accumulated Precipitation for Hollister, CA", 
       subtitle = "Oct 2023 - May 2024")


4.6 What was the total accumulation this water year?

holister_pr_dly_tbl$daily_precip |> sum(na.rm = TRUE)
[1] 14.07