Chapter 3 Time-Series Plots
In this chapter we’ll look at how to make some time series plots with Cal-Adapt data.
3.1 Load Packages
As always, begin by loading a bunch of packages into memory and specifying preferences for conflicting function names.
library(caladaptr)
library(units)
library(ggplot2)
library(dplyr)
library(tidyr)
library(lubridate)
library(sf)
Set conflicted preferences:
library(conflicted)
conflict_prefer("filter", "dplyr", quiet = TRUE)
conflict_prefer("count", "dplyr", quiet = TRUE)
conflict_prefer("select", "dplyr", quiet = TRUE)
3.2 Time Series Plots
In this section our goal is to create time series plot of projected maximum annual temperature for a single point, using the four recommended GCMs for California under RCP 4.5.
3.2.1 Fetch Some Data
First we create an API request object that will ask the Cal-Adapt API to send back data for:
- maximum daily temperature, averaged by year (on the server)
- a point location (outside Salinas, CA)
- the first four CGMs (HadGEM2-ES, CNRM-CM5, CanESM2, MIROC5 - the four ‘priority’ models recommended for California under the 4th Climate Change Assessment)
- two emissions scenarios (rcp45 and rcp85)
- the time period 2006-2099
<- ca_loc_pt(coords = c(-121.549945, 36.643642)) %>%
salns_cap ca_gcm(gcms[1:4]) %>%
ca_scenario(c("rcp45", "rcp85")) %>%
ca_period("year") %>%
ca_years(start = 2006, end = 2099) %>%
ca_cvar(c("tasmin", "tasmax"))
Check the API request for issues:
%>% ca_preflight() salns_cap
## General issues
## - none found
## Issues for querying values
## - none found
## Issues for downloading rasters
## - none found
Fetch data:
<- salns_cap %>% ca_getvals_tbl(quiet = TRUE) salns_tbl
glimpse(salns_tbl)
## Rows: 1,280
## Columns: 8
## $ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ cvar <fct> tasmin, tasmin, tasmin, tasmin, tasmin, tasmin, tasmin, tasmi~
## $ period <fct> year, year, year, year, year, year, year, year, year, year, y~
## $ gcm <fct> HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, H~
## $ scenario <fct> rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45~
## $ spag <fct> none, none, none, none, none, none, none, none, none, none, n~
## $ dt <chr> "2020-12-31", "2021-12-31", "2022-12-31", "2023-12-31", "2024~
## $ val [K] 281.5107 [K], 281.1210 [K], 282.2193 [K], 281.9851 [K], 281.815~
The val
column contains the temperature values with the units recorded (i.e., a units object from the units
package). But Kelvin is not very intuitive. Let’s add a column with the temperatures in Fahrenheit. Because we already have the temperature values as a units object, we can convert them with units::set_units()
.
<- salns_tbl %>%
salns_degf_tbl mutate(temp_f = units::set_units(val, degF))
glimpse(salns_degf_tbl)
## Rows: 1,280
## Columns: 9
## $ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ cvar <fct> tasmin, tasmin, tasmin, tasmin, tasmin, tasmin, tasmin, tasmi~
## $ period <fct> year, year, year, year, year, year, year, year, year, year, y~
## $ gcm <fct> HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, H~
## $ scenario <fct> rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45~
## $ spag <fct> none, none, none, none, none, none, none, none, none, none, n~
## $ dt <chr> "2020-12-31", "2021-12-31", "2022-12-31", "2023-12-31", "2024~
## $ val [K] 281.5107 [K], 281.1210 [K], 282.2193 [K], 281.9851 [K], 281.815~
## $ temp_f [degF] 47.04928 [degF], 46.34775 [degF], 48.32468 [degF], 47.90319 ~
3.2.2 Plot 1. Single Climate Variable & Single RCP
Our table has a lot of data combined: two climate variables, four GCMs, and two emissions scenarios. Each combo of those variables constitutes a time series, so we have 16 time series in total.
We’ll start by plotting just a single time series (one climate variable, one GCM, one scenario). Because our table is in ‘long’ format, we can easily filter it with dplyr::filter()
:
<- salns_degf_tbl %>%
single_series_tbl filter(cvar == "tasmax", gcm == "HadGEM2-ES", scenario == "rcp45") %>%
select(dt, cvar, gcm, scenario, period, temp_f)
glimpse(single_series_tbl)
## Rows: 80
## Columns: 6
## $ dt <chr> "2020-12-31", "2021-12-31", "2022-12-31", "2023-12-31", "2024~
## $ cvar <fct> tasmax, tasmax, tasmax, tasmax, tasmax, tasmax, tasmax, tasma~
## $ gcm <fct> HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, HadGEM2-ES, H~
## $ scenario <fct> rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45, rcp45~
## $ period <fct> year, year, year, year, year, year, year, year, year, year, y~
## $ temp_f [degF] 67.86865 [degF], 69.67442 [degF], 69.38894 [degF], 68.93449 ~
Plot with ggplot:
ggplot(data = single_series_tbl, aes(x = as.Date(dt), y = as.numeric(temp_f))) +
geom_line() +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA. GCM: HadGEM2-ES; Emissions scenario: RCP4.5",
x = "year", y = "temp (F)")
If you want to plot multiple GCMs, you can differentiate them with colors. ggplot makes this easy, and will even create a legend for you. But although we can easily add lots of lines to a plot, it makes to sense to limit each plot to a single climate variables and a single emissions scenario.
<- salns_degf_tbl %>%
mult_gcm_tbl filter(cvar == "tasmax", scenario == "rcp45") %>%
select(dt, gcm, temp_f)
ggplot(data = mult_gcm_tbl, aes(x = as.Date(dt), y = as.numeric(temp_f))) +
geom_line(aes(color=gcm)) +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA. Emissions scenario: RCP4.5",
x = "year", y = "temp (F)")
3.3 Multiple Emissions Scenarios
To plot multiple emissions scenarios, you probably want to use facets:
<- salns_degf_tbl %>%
mult_scen_tbl filter(cvar == "tasmax") %>%
select(dt, gcm, scenario, temp_f)
ggplot(data = mult_scen_tbl, aes(x = as.Date(dt), y = as.numeric(temp_f))) +
geom_line(aes(color=gcm)) +
facet_grid(scenario ~ .) +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA",
x = "year", y = "temp (F)")
3.4 Overlay a Trend Line
ggplot can add a trend line for you (by adding geom_smooth
), but you have to decide whether you want a trend line for each individual GCM, or perhaps all GCMs in the same emissions scenario. Below we overlay a trend line for each emissions scenario.
ggplot(data = mult_scen_tbl, aes(x = as.Date(dt), y = as.numeric(temp_f))) +
geom_line(aes(color=gcm)) +
geom_smooth(formula = y ~ x, method=lm, se=FALSE, col='red', size=1) +
facet_grid(scenario ~ .) +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA",
x = "year", y = "temp (F)")
3.5 Historic and Future Data Together
Many studies on the impact of climate change look at the past as a reference for future climate conditions. Here we’ll create a time series plot that goes all the way back to 1950.
The first question you have to ask is what data should I use for the historic period?. Your first reaction might be to use observed historic data (i.e., from weather stations). You can certainly do that, as the Cal-Adapt API has both the Livneh and gridMet datasets (both of which are observed data interpolated from weather stations).
However we don’t have observed data for the future. This presents a problem because comparing observed past data with modeled future data is like comparing apples and oranges. They represent different things.
A better option is to compare the modeled future climate with a modeled historic climate. This may seem counter-intuitive, but the climate models have been trained from historic data, and have been shown to do a very good job at capturing the climate envelope of the past.
First we need to get the historic modeled data, for the same climate variable, GCMs, and location. caladaptR has no problem combining multiple datasets in the same API call, however in this case we need to make a separate API call for the historic data because the dates are different.
<- ca_loc_pt(coords = c(-121.549945, 36.643642)) %>%
salns_hist_cap ca_gcm(gcms[1:4]) %>%
ca_scenario("historical") %>%
ca_period("year") %>%
ca_years(start = 1950, end = 2005) %>%
ca_cvar(c("tasmin", "tasmax"))
%>% ca_preflight() salns_hist_cap
## General issues
## - none found
## Issues for querying values
## - none found
## Issues for downloading rasters
## - none found
Next we get the values. While we’re at it, we can add the column for degrees Fahrenheit:
<- salns_hist_cap %>%
salns_hist_tbl ca_getvals_tbl(quiet = TRUE) %>%
mutate(temp_f = units::set_units(val, degF))
head(salns_hist_tbl)
## # A tibble: 6 x 9
## id cvar period gcm scenario spag dt val temp_f
## <int> <fct> <fct> <fct> <fct> <fct> <chr> [K] [degF]
## 1 1 tasmin year HadGEM2-ES historical none 1950-12-31 280. 45.0
## 2 1 tasmin year HadGEM2-ES historical none 1951-12-31 280. 44.9
## 3 1 tasmin year HadGEM2-ES historical none 1952-12-31 282. 47.6
## 4 1 tasmin year HadGEM2-ES historical none 1953-12-31 282. 48.1
## 5 1 tasmin year HadGEM2-ES historical none 1954-12-31 281. 46.9
## 6 1 tasmin year HadGEM2-ES historical none 1955-12-31 281. 46.0
Our tibbles for the past and future time periods have identical columns, so we can stack them. While we’re at it, we can convert the dt column from character to Date:
<- salns_degf_tbl %>%
salns_all_tbl bind_rows(salns_hist_tbl) %>%
mutate(dt = as.Date(dt))
head(salns_all_tbl)
## # A tibble: 6 x 9
## id cvar period gcm scenario spag dt val temp_f
## <int> <fct> <fct> <fct> <fct> <fct> <date> [K] [degF]
## 1 1 tasmin year HadGEM2-ES rcp45 none 2020-12-31 282. 47.0
## 2 1 tasmin year HadGEM2-ES rcp45 none 2021-12-31 281. 46.3
## 3 1 tasmin year HadGEM2-ES rcp45 none 2022-12-31 282. 48.3
## 4 1 tasmin year HadGEM2-ES rcp45 none 2023-12-31 282. 47.9
## 5 1 tasmin year HadGEM2-ES rcp45 none 2024-12-31 282. 47.6
## 6 1 tasmin year HadGEM2-ES rcp45 none 2025-12-31 282. 47.5
summary(salns_all_tbl %>% select(cvar, gcm, scenario, dt, temp_f))
## cvar gcm scenario dt
## tasmax:864 CanESM2 :432 rcp45 :640 Min. :1950-12-31
## tasmin:864 CNRM-CM5 :432 rcp85 :640 1st Qu.:2004-09-30
## HadGEM2-ES:432 historical:448 Median :2046-07-01
## MIROC5 :432 Mean :2039-03-29
## 3rd Qu.:2073-04-01
## Max. :2099-12-31
## temp_f
## Min. :43.99
## 1st Qu.:48.73
## Median :60.94
## Mean :59.49
## 3rd Qu.:69.99
## Max. :78.35
To plot the time series from 1950 thru 2099, we have to pick one of the future scenarios.
<- salns_all_tbl %>%
salns_histrcp45_tbl filter(cvar == "tasmax", scenario %in% c("historical", "rcp45")) %>%
select(dt, gcm, temp_f)
salns_histrcp45_tbl
## # A tibble: 544 x 3
## dt gcm temp_f
## <date> <fct> [degF]
## 1 2020-12-31 HadGEM2-ES 67.9
## 2 2021-12-31 HadGEM2-ES 69.7
## 3 2022-12-31 HadGEM2-ES 69.4
## 4 2023-12-31 HadGEM2-ES 68.9
## 5 2024-12-31 HadGEM2-ES 68.7
## 6 2025-12-31 HadGEM2-ES 69.6
## 7 2026-12-31 HadGEM2-ES 68.9
## 8 2027-12-31 HadGEM2-ES 70.1
## 9 2028-12-31 HadGEM2-ES 67.3
## 10 2029-12-31 HadGEM2-ES 67.6
## # ... with 534 more rows
ggplot(data = salns_histrcp45_tbl, aes(x = as.Date(dt), y = as.numeric(temp_f))) +
geom_line(aes(color=gcm)) +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA. Emissions scenarios: Historical (1950-2005), RCP4.5 (2006-2099)",
x = "year", y = "temp (F)")
3.6 Overlay a 32-model Ensemble
In the examples above, we plotted the 4 ‘priority’ GCMs out of the 10 recommended GCMs for California. But there are 22 other GCMs out there. To compare how the four (or ten) GCMs in a plot compare to the range of variability of all 32 GCMs, we can overlay the 32-ensemble model.
The 32-ensemble model is available for many but not all of the climate variables, temporal aggregations, or emission scenarios. You can see which ensemble raster series are available by searching the data catalog for ‘ens32’:
<- ca_catalog_search("ens32", quiet = TRUE)
ensem_rasterseries_df nrow(ensem_rasterseries_df)
## [1] 207
1:6, c("slug", "name")] ensem_rasterseries_df[
## slug
## 1 cdd_year_ens32avg_historical
## 2 cdd_year_ens32avg_rcp45
## 3 cdd_year_ens32avg_rcp85
## 4 cddm_30y_ens32max_historical
## 5 cddm_30y_ens32max_rcp45
## 6 cddm_30y_ens32max_rcp85
## name
## 1 Average number of cooling degree days per year from 32 models historical
## 2 Average number of cooling degree days per year from 32 models rcp45
## 3 Average number of cooling degree days per year from 32 models rcp85
## 4 30 year annual average maximum length of dry spell from 32 model maximum historical
## 5 30 year annual average maximum length of dry spell from 32 model maximum rcp45
## 6 30 year annual average maximum length of dry spell from 32 model maximum rcp85
There are 207 raster series that are derived from the 32-ensemble models. caladaptr doesn’t have convenience functions that you can string together to specify the 32-ensemble raster series, so you need to find the slug(s) by searching the data catalog (see above). To overlay the 32-ensemble of daily maximum averaged by year, we need to first download the following slugs:
<- ca_loc_pt(coords = c(-121.549945, 36.643642)) %>%
salns_32ens_cap ca_slug(c("tasmax_year_ens32min_rcp45", "tasmax_year_ens32max_rcp45",
"tasmin_year_ens32min_rcp45", "tasmin_year_ens32max_rcp45")) %>%
ca_years(start = 2006, end = 2099)
%>% ca_preflight() salns_32ens_cap
## General issues
## - none found
## Issues for querying values
## - none found
## Issues for downloading rasters
## - none found
<- salns_32ens_cap %>%
salns_32ens_tbl ca_getvals_tbl(quiet = TRUE)
glimpse(salns_32ens_tbl)
## Rows: 376
## Columns: 5
## $ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ slug <chr> "tasmax_year_ens32min_rcp45", "tasmax_year_ens32min_rcp45", "tasm~
## $ spag <fct> none, none, none, none, none, none, none, none, none, none, none,~
## $ dt <chr> "2006-12-31", "2007-12-31", "2008-12-31", "2009-12-31", "2010-12-~
## $ val [K] 291.6819 [K], 292.1163 [K], 292.0294 [K], 292.1826 [K], 291.7250 [K~
We need to make two changes: convert the date column from character to date, and add a column for degrees F.
<- salns_32ens_tbl %>%
salns_32ens_tbl mutate(dt = as.Date(dt),
temp_f = units::set_units(val, degF))
head(salns_32ens_tbl)
## # A tibble: 6 x 6
## id slug spag dt val temp_f
## <int> <chr> <fct> <date> [K] [degF]
## 1 1 tasmax_year_ens32min_rcp45 none 2006-12-31 292. 65.4
## 2 1 tasmax_year_ens32min_rcp45 none 2007-12-31 292. 66.1
## 3 1 tasmax_year_ens32min_rcp45 none 2008-12-31 292. 66.0
## 4 1 tasmax_year_ens32min_rcp45 none 2009-12-31 292. 66.3
## 5 1 tasmax_year_ens32min_rcp45 none 2010-12-31 292. 65.4
## 6 1 tasmax_year_ens32min_rcp45 none 2011-12-31 292. 66.3
Now we can plot the 32-ensemble min and max of the daily maximum temperature averaged by year as individual lines:
<- salns_32ens_tbl %>%
ens_minmax_tbl filter(slug %in% c("tasmax_year_ens32min_rcp45", "tasmax_year_ens32max_rcp45")) %>%
select(dt, slug, temp_f)
ggplot(data = ens_minmax_tbl,
aes(x = dt, y = as.numeric(temp_f))) +
geom_line(aes(color=slug)) +
labs(x = "year", y = "temp (F)")
To have the area between the lines show up as a shaded area, we’re going to use geom_ribbon. But geom_ribbon requires that the min and max series to be in separate columns (i.e., wide format instead of long format). Hence before we can plot it we need to reshape the data:
<- ens_minmax_tbl %>%
ens_minmax_wide_tbl ::pivot_wider(names_from = slug,
tidyrvalues_from = temp_f,
id_cols = dt)
head(ens_minmax_wide_tbl)
## # A tibble: 6 x 3
## dt tasmax_year_ens32min_rcp45 tasmax_year_ens32max_rcp45
## <date> [degF] [degF]
## 1 2006-12-31 65.4 70.2
## 2 2007-12-31 66.1 70.2
## 3 2008-12-31 66.0 69.7
## 4 2009-12-31 66.3 70.8
## 5 2010-12-31 65.4 69.8
## 6 2011-12-31 66.3 71.0
Now we can plot the area between the lines:
ggplot(data = ens_minmax_wide_tbl, aes(x = dt)) +
geom_ribbon(aes(ymin = as.numeric(tasmax_year_ens32min_rcp45), ymax = as.numeric(tasmax_year_ens32max_rcp45)), fill = "gray70")
## This works also (don't convert the units)
ggplot(data = ens_minmax_wide_tbl, aes(x = dt)) +
geom_ribbon(aes(ymin = tasmax_year_ens32min_rcp45, ymax = tasmax_year_ens32max_rcp45), fill = "gray80")
Now we can combine our two plots. We’ll start with the geom_ribbon and then lay the data series on top.
ggplot(data = ens_minmax_wide_tbl, aes(x = dt)) +
geom_ribbon(aes(ymin = as.numeric(tasmax_year_ens32min_rcp45),
ymax = as.numeric(tasmax_year_ens32max_rcp45)), fill = "gray80") +
geom_line(data = mult_gcm_tbl,
mapping = aes(x = as.Date(dt), y = as.numeric(temp_f), color = gcm)) +
labs(title = "Predicted Maximum Daily Temperature Averaged by Year",
caption = "Location: Salinas, CA. Emissions scenario: RCP4.5. Shaded background = 32-GCM ensemble",
x = "year", y = "temp (F)")