Load Packages

library(dplyr)
library(tidyr)

# Specify package preferences for filter, count, select, and arrange
library(conflicted)
conflict_prefer("filter", "dplyr", quiet = TRUE)
conflict_prefer("count", "dplyr", quiet = TRUE)
conflict_prefer("select", "dplyr", quiet = TRUE)
conflict_prefer("arrange", "dplyr", quiet = TRUE)


Import Weather Data

First, we import some weather data from the CIMIS network that has been saved to a Google Sheet that can be viewed by anyone with the link.

library(googlesheets4)
arvinedison_url <- "https://docs.google.com/spreadsheets/d/13FR4Mji24KqZmLqK_BiLKew8nN4MbCdh1KpzT54hH4M/"

## Tell R not to authorize with Google (not needed for this public Google Sheet)
gs4_deauth()

arvinedison_tbl <- read_sheet(arvinedison_url, sheet = "Sheet1")
v Reading from Arvin-Edison CIMIS Data Sep-2023.
v Range ''Sheet1''.
## View(arvinedison_tbl)
head(arvinedison_tbl)

Pro Tip:

You can download data from the CIMIS network directly into R using the cimir package (Michael Koohafkan).

library(cimir)
set_key("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")
arvinedison_tbl <- cimis_data(targets = 125, start.date = "2023-09-16", end.date = "2023-10-15",
                               items = "day-air-tmp-max,day-air-tmp-min,day-eto,day-precip")


CHALLENGE #1

Would you consider the CIMIS data to be long or wide?

It is tidy?


Create a date column

library(lubridate)
arvedsn_dt_tbl <- arvinedison_tbl |> 
  mutate(Dt = make_date(year = Year, month = Month, day = Day)) |> 
  relocate(Dt, .after = Day)

## View(arvedsn_dt_tbl)
head(arvedsn_dt_tbl)


Create a wide version of the table

Guide to pivoting: https://tidyr.tidyverse.org/articles/pivot.html

library(tidyr)
arvedsn_wide_tbl <- arvedsn_dt_tbl |> 
  pivot_wider(names_from = Item, values_from = Value, id_cols = c(Station, Dt))

## View(arvedsn_wide_tbl)
head(arvedsn_wide_tbl)


Deal with Missing Values

The evapotranspiration column has some missing values. In this section, we’ll look at a couple of different methods of filling them in. (For a real project, we’d want to consult someone who knows about evapotranspiration to decide which method is most appropriate for these data!)

First, we create a data frame with just Eto:

arvedsn_eto_tbl <- arvedsn_dt_tbl |> 
  filter(Item == "DayEto") |> 
  select(Station, Dt, Item, Value, Unit)

head(arvedsn_eto_tbl)


Are there missing values?

## View(arvedsn_eto_tbl)

summary(arvedsn_eto_tbl)
    Station          Dt                 Item               Value            Unit          
 Min.   :125   Min.   :2023-09-16   Length:30          Min.   :0.1000   Length:30         
 1st Qu.:125   1st Qu.:2023-09-23   Class :character   1st Qu.:0.1500   Class :character  
 Median :125   Median :2023-09-30   Mode  :character   Median :0.1800   Mode  :character  
 Mean   :125   Mean   :2023-09-30                      Mean   :0.1708                     
 3rd Qu.:125   3rd Qu.:2023-10-07                      3rd Qu.:0.1900                     
 Max.   :125   Max.   :2023-10-15                      Max.   :0.2100                     
                                                       NA's   :5                          


Fix #1. Throw away incomplete rows

tidyr::drop_na() will throw away rows that have NA values in any of the specified columns (default is check all columns):

arvedsn_eto_fix1_tbl <- arvedsn_eto_tbl |> 
  tidyr::drop_na()

# View(arvedsn_eto_fix1_tbl)
nrow(arvedsn_eto_tbl); nrow(arvedsn_eto_fix1_tbl)
[1] 30
[1] 25


Fix #2. Replace with the mean or median

Step 1. Decide: mean or median?

arvedsn_eto_tbl |> pull(Value) |> hist(breaks = 20)


Step 2. Compute the median

value_median <- arvedsn_eto_tbl |> pull(Value) |> median(na.rm = TRUE)
value_median
[1] 0.18


Step 3. Substitute the median whenever Value is NA.

arvedsn_eto_fix2_tbl <- arvedsn_eto_tbl |> 
  mutate(Value_Fix1 = if_else(is.na(Value), value_median, Value))

## You can also use tidyr::replace_na()
## arvedsn_eto_fix2_tbl <- arvedsn_eto_tbl |> 
##   replace_na(list(Value = value_median))

# View(arvedsn_eto_fix2_tbl) 


Fix #3. Interpolate missing values

To interpolate missing values, we’ll create a vector with the replacement values (using the zoo package), and then tack it on to the data frame.

First, create the replacement values:

library(zoo)
vals_linear_interpolation <- na.approx(arvedsn_eto_tbl$Value)
vals_linear_interpolation
 [1] 0.2100 0.1900 0.2000 0.1800 0.1900 0.1800 0.1900 0.1900 0.1900 0.1800 0.1825 0.1850 0.1875 0.1900 0.1300 0.1200 0.1600 0.1700
[19] 0.1800 0.1900 0.1800 0.1900 0.1800 0.1700 0.1600 0.1000 0.1500 0.1500 0.1500 0.1500
# You can also do spline interpolation
# vals_spline_interpolation <- na.spline(arvedsn_eto_tbl$Value)
# vals_spline_interpolation


Next, add the vector with imputed values as a new column with bind_cols:

arvedsn_eto_fix3_tbl <- arvedsn_eto_tbl |> 
  bind_cols(Value_Fix2 = vals_linear_interpolation)

# View(arvedsn_eto_fix3_tbl)
head(arvedsn_eto_fix3_tbl)


This is just scratching the surface of dealing with missing values!

See also: imputeTS


CHALLENGE #2

There are also some missing values in the Maximum Daily Air Temperature (DayAirTmpMax).

Decide what would be a reasonable method to deal with these, and implement it.


Resampling data

Bin the Eto values into low, medium, and high:

arvedsn_eto_fix3_tbl <- arvedsn_eto_fix2_tbl |> 
  mutate(eto_level = case_when(Value_Fix2 <= 0.14 ~ "low",
                               Value_Fix2 <= 0.19 ~ "medium",
                               TRUE ~ "high"))
Error in `mutate()`:
i In argument: `eto_level = case_when(...)`.
Caused by error in `case_when()`:
! Failed to evaluate the left-hand side of formula 1.
Caused by error:
! object 'Value_Fix2' not found
Backtrace:
  1. dplyr::mutate(...)
  8. dplyr::case_when(...)
  9. dplyr:::case_formula_evaluate(...)
 11. rlang::eval_tidy(pair$lhs, env = default_env)


End

Remember to save your work to render a HTML file.

LS0tDQp0aXRsZTogIkRhdGEgV3JhbmdsaW5nIDM6IFRpbWUgU2VyaWVzIERhdGEgYW5kIE1pc3NpbmcgVmFsdWVzIg0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMg0KICAgIHRvY19mbG9hdDogeWVzDQotLS0NCg0KYGBge2NzcyBlY2hvID0gRkFMU0V9DQpoMSxoMixoMyB7Zm9udC13ZWlnaHQ6Ym9sZDt9DQpoMSB7Zm9udC1zaXplOjI0cHg7fQ0KaDIge2ZvbnQtc2l6ZToyMHB4O30NCmgzIHtmb250LXNpemU6MTZweDt9DQpgYGANClwNCg0KIyBMb2FkIFBhY2thZ2VzIA0KDQpgYGB7ciBjaHVuazAxfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXIpDQoNCiMgU3BlY2lmeSBwYWNrYWdlIHByZWZlcmVuY2VzIGZvciBmaWx0ZXIsIGNvdW50LCBzZWxlY3QsIGFuZCBhcnJhbmdlDQpsaWJyYXJ5KGNvbmZsaWN0ZWQpDQpjb25mbGljdF9wcmVmZXIoImZpbHRlciIsICJkcGx5ciIsIHF1aWV0ID0gVFJVRSkNCmNvbmZsaWN0X3ByZWZlcigiY291bnQiLCAiZHBseXIiLCBxdWlldCA9IFRSVUUpDQpjb25mbGljdF9wcmVmZXIoInNlbGVjdCIsICJkcGx5ciIsIHF1aWV0ID0gVFJVRSkNCmNvbmZsaWN0X3ByZWZlcigiYXJyYW5nZSIsICJkcGx5ciIsIHF1aWV0ID0gVFJVRSkNCmBgYA0KDQpcDQoNCiMgSW1wb3J0IFdlYXRoZXIgRGF0YQ0KDQpGaXJzdCwgd2UgaW1wb3J0IHNvbWUgd2VhdGhlciBkYXRhIGZyb20gdGhlIENJTUlTIG5ldHdvcmsgdGhhdCBoYXMgYmVlbiBzYXZlZCB0byBhIFtHb29nbGUgU2hlZXRdKGh0dHBzOi8vZG9jcy5nb29nbGUuY29tL3NwcmVhZHNoZWV0cy9kLzEzRlI0TWppMjRLcVptTHFLX0JpTEtldzhuTjRNYkNkaDFLcHpUNTRoSDRNLykgdGhhdCBjYW4gYmUgdmlld2VkIGJ5IGFueW9uZSB3aXRoIHRoZSBsaW5rLg0KDQpgYGB7ciBjaHVuazAyfQ0KbGlicmFyeShnb29nbGVzaGVldHM0KQ0KYXJ2aW5lZGlzb25fdXJsIDwtICJodHRwczovL2RvY3MuZ29vZ2xlLmNvbS9zcHJlYWRzaGVldHMvZC8xM0ZSNE1qaTI0S3FabUxxS19CaUxLZXc4bk40TWJDZGgxS3B6VDU0aEg0TS8iDQoNCiMjIFRlbGwgUiBub3QgdG8gYXV0aG9yaXplIHdpdGggR29vZ2xlIChub3QgbmVlZGVkIGZvciB0aGlzIHB1YmxpYyBHb29nbGUgU2hlZXQpDQpnczRfZGVhdXRoKCkNCg0KYXJ2aW5lZGlzb25fdGJsIDwtIHJlYWRfc2hlZXQoYXJ2aW5lZGlzb25fdXJsLCBzaGVldCA9ICJTaGVldDEiKQ0KDQojIyBWaWV3KGFydmluZWRpc29uX3RibCkNCmhlYWQoYXJ2aW5lZGlzb25fdGJsKQ0KYGBgDQoNCioqUHJvIFRpcDoqKg0KDQpZb3UgY2FuIGRvd25sb2FkIGRhdGEgZnJvbSB0aGUgQ0lNSVMgbmV0d29yayBkaXJlY3RseSBpbnRvIFIgdXNpbmcgdGhlIFtgY2ltaXJgXShodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy9wYWNrYWdlPWNpbWlyKSBwYWNrYWdlIChNaWNoYWVsIEtvb2hhZmthbikuDQoNCmBgYA0KbGlicmFyeShjaW1pcikNCnNldF9rZXkoInh4eHh4eHh4LXh4eHgteHh4eC14eHh4LXh4eHh4eHh4eHh4eCIpDQphcnZpbmVkaXNvbl90YmwgPC0gY2ltaXNfZGF0YSh0YXJnZXRzID0gMTI1LCBzdGFydC5kYXRlID0gIjIwMjMtMDktMTYiLCBlbmQuZGF0ZSA9ICIyMDIzLTEwLTE1IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpdGVtcyA9ICJkYXktYWlyLXRtcC1tYXgsZGF5LWFpci10bXAtbWluLGRheS1ldG8sZGF5LXByZWNpcCIpDQpgYGANCg0KXA0KDQojIENIQUxMRU5HRSAjMQ0KDQpXb3VsZCB5b3UgY29uc2lkZXIgdGhlIENJTUlTIGRhdGEgdG8gYmUgbG9uZyBvciB3aWRlPyANCg0KSXQgaXMgdGlkeT8NCg0KXA0KDQojIENyZWF0ZSBhIGRhdGUgY29sdW1uDQoNCmBgYHtyIGNodW5rMDN9DQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmFydmVkc25fZHRfdGJsIDwtIGFydmluZWRpc29uX3RibCB8PiANCiAgbXV0YXRlKER0ID0gbWFrZV9kYXRlKHllYXIgPSBZZWFyLCBtb250aCA9IE1vbnRoLCBkYXkgPSBEYXkpKSB8PiANCiAgcmVsb2NhdGUoRHQsIC5hZnRlciA9IERheSkNCg0KIyMgVmlldyhhcnZlZHNuX2R0X3RibCkNCmhlYWQoYXJ2ZWRzbl9kdF90YmwpDQpgYGANCg0KXA0KDQojIENyZWF0ZSBhIHdpZGUgdmVyc2lvbiBvZiB0aGUgdGFibGUNCg0KR3VpZGUgdG8gcGl2b3Rpbmc6IDxodHRwczovL3RpZHlyLnRpZHl2ZXJzZS5vcmcvYXJ0aWNsZXMvcGl2b3QuaHRtbD4NCg0KYGBge3IgY2h1bmswNH0NCmxpYnJhcnkodGlkeXIpDQphcnZlZHNuX3dpZGVfdGJsIDwtIGFydmVkc25fZHRfdGJsIHw+IA0KICBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gSXRlbSwgdmFsdWVzX2Zyb20gPSBWYWx1ZSwgaWRfY29scyA9IGMoU3RhdGlvbiwgRHQpKQ0KDQojIyBWaWV3KGFydmVkc25fd2lkZV90YmwpDQpoZWFkKGFydmVkc25fd2lkZV90YmwpDQpgYGANCg0KXA0KDQojIERlYWwgd2l0aCBNaXNzaW5nIFZhbHVlcw0KDQpUaGUgZXZhcG90cmFuc3BpcmF0aW9uIGNvbHVtbiBoYXMgc29tZSBtaXNzaW5nIHZhbHVlcy4gSW4gdGhpcyBzZWN0aW9uLCB3ZSdsbCBsb29rIGF0IGEgY291cGxlIG9mIGRpZmZlcmVudCBtZXRob2RzIG9mIGZpbGxpbmcgdGhlbSBpbi4gKEZvciBhIHJlYWwgcHJvamVjdCwgd2UnZCB3YW50IHRvIGNvbnN1bHQgc29tZW9uZSB3aG8ga25vd3MgYWJvdXQgZXZhcG90cmFuc3BpcmF0aW9uIHRvIGRlY2lkZSB3aGljaCBtZXRob2QgaXMgbW9zdCBhcHByb3ByaWF0ZSBmb3IgdGhlc2UgZGF0YSEpDQoNCkZpcnN0LCB3ZSBjcmVhdGUgYSBkYXRhIGZyYW1lIHdpdGgganVzdCBFdG86DQoNCmBgYHtyIGNodW5rMDV9DQphcnZlZHNuX2V0b190YmwgPC0gYXJ2ZWRzbl9kdF90YmwgfD4gDQogIGZpbHRlcihJdGVtID09ICJEYXlFdG8iKSB8PiANCiAgc2VsZWN0KFN0YXRpb24sIER0LCBJdGVtLCBWYWx1ZSwgVW5pdCkNCg0KaGVhZChhcnZlZHNuX2V0b190YmwpDQpgYGANCg0KXA0KDQpBcmUgdGhlcmUgbWlzc2luZyB2YWx1ZXM/DQoNCmBgYHtyIGNodW5rMDZ9DQojIyBWaWV3KGFydmVkc25fZXRvX3RibCkNCg0Kc3VtbWFyeShhcnZlZHNuX2V0b190YmwpDQpgYGANCg0KXA0KDQojIEZpeCAjMS4gVGhyb3cgYXdheSBpbmNvbXBsZXRlIHJvd3MNCg0KYHRpZHlyOjpkcm9wX25hKClgIHdpbGwgdGhyb3cgYXdheSByb3dzIHRoYXQgaGF2ZSBgTkFgIHZhbHVlcyBpbiAqYW55KiBvZiB0aGUgc3BlY2lmaWVkIGNvbHVtbnMgKGRlZmF1bHQgaXMgY2hlY2sgYWxsIGNvbHVtbnMpOg0KDQpgYGB7ciBjaHVuazA3fQ0KYXJ2ZWRzbl9ldG9fZml4MV90YmwgPC0gYXJ2ZWRzbl9ldG9fdGJsIHw+IA0KICB0aWR5cjo6ZHJvcF9uYSgpDQoNCiMgVmlldyhhcnZlZHNuX2V0b19maXgxX3RibCkNCm5yb3coYXJ2ZWRzbl9ldG9fdGJsKTsgbnJvdyhhcnZlZHNuX2V0b19maXgxX3RibCkNCmBgYA0KDQpcDQoNCiMgRml4ICMyLiBSZXBsYWNlIHdpdGggdGhlIG1lYW4gb3IgbWVkaWFuDQoNClN0ZXAgMS4gRGVjaWRlOiBtZWFuIG9yIG1lZGlhbj8NCg0KYGBge3IgY2h1bmswOH0NCmFydmVkc25fZXRvX3RibCB8PiBwdWxsKFZhbHVlKSB8PiBoaXN0KGJyZWFrcyA9IDIwKQ0KYGBgDQoNClwNCg0KU3RlcCAyLiBDb21wdXRlIHRoZSBtZWRpYW4NCg0KYGBge3IgY2h1bmswOX0NCnZhbHVlX21lZGlhbiA8LSBhcnZlZHNuX2V0b190YmwgfD4gcHVsbChWYWx1ZSkgfD4gbWVkaWFuKG5hLnJtID0gVFJVRSkNCnZhbHVlX21lZGlhbg0KYGBgDQoNClwNCg0KU3RlcCAzLiBTdWJzdGl0dXRlIHRoZSBtZWRpYW4gd2hlbmV2ZXIgYFZhbHVlYCBpcyBOQS4NCg0KYGBge3IgY2h1bmsxMH0NCmFydmVkc25fZXRvX2ZpeDJfdGJsIDwtIGFydmVkc25fZXRvX3RibCB8PiANCiAgbXV0YXRlKFZhbHVlX0ZpeDEgPSBpZl9lbHNlKGlzLm5hKFZhbHVlKSwgdmFsdWVfbWVkaWFuLCBWYWx1ZSkpDQoNCiMjIFlvdSBjYW4gYWxzbyB1c2UgdGlkeXI6OnJlcGxhY2VfbmEoKQ0KIyMgYXJ2ZWRzbl9ldG9fZml4Ml90YmwgPC0gYXJ2ZWRzbl9ldG9fdGJsIHw+IA0KIyMgICByZXBsYWNlX25hKGxpc3QoVmFsdWUgPSB2YWx1ZV9tZWRpYW4pKQ0KDQojIFZpZXcoYXJ2ZWRzbl9ldG9fZml4Ml90YmwpIA0KYGBgDQoNClwNCg0KIyBGaXggIzMuIEludGVycG9sYXRlIG1pc3NpbmcgdmFsdWVzDQoNClRvIGludGVycG9sYXRlIG1pc3NpbmcgdmFsdWVzLCB3ZSdsbCBjcmVhdGUgYSB2ZWN0b3Igd2l0aCB0aGUgcmVwbGFjZW1lbnQgdmFsdWVzICh1c2luZyB0aGUgYHpvb2AgcGFja2FnZSksIGFuZCB0aGVuIHRhY2sgaXQgb24gdG8gdGhlIGRhdGEgZnJhbWUuDQoNCkZpcnN0LCBjcmVhdGUgdGhlIHJlcGxhY2VtZW50IHZhbHVlczoNCg0KYGBge3IgY2h1bmsxMX0NCmxpYnJhcnkoem9vKQ0KdmFsc19saW5lYXJfaW50ZXJwb2xhdGlvbiA8LSBuYS5hcHByb3goYXJ2ZWRzbl9ldG9fdGJsJFZhbHVlKQ0KdmFsc19saW5lYXJfaW50ZXJwb2xhdGlvbg0KDQojIFlvdSBjYW4gYWxzbyBkbyBzcGxpbmUgaW50ZXJwb2xhdGlvbg0KIyB2YWxzX3NwbGluZV9pbnRlcnBvbGF0aW9uIDwtIG5hLnNwbGluZShhcnZlZHNuX2V0b190YmwkVmFsdWUpDQojIHZhbHNfc3BsaW5lX2ludGVycG9sYXRpb24NCmBgYA0KXA0KDQpOZXh0LCBhZGQgdGhlIHZlY3RvciB3aXRoIGltcHV0ZWQgdmFsdWVzIGFzIGEgbmV3IGNvbHVtbiB3aXRoIGBiaW5kX2NvbHNgOg0KDQpgYGB7ciBjaHVuazEyfQ0KYXJ2ZWRzbl9ldG9fZml4M190YmwgPC0gYXJ2ZWRzbl9ldG9fdGJsIHw+IA0KICBiaW5kX2NvbHMoVmFsdWVfRml4MiA9IHZhbHNfbGluZWFyX2ludGVycG9sYXRpb24pDQoNCiMgVmlldyhhcnZlZHNuX2V0b19maXgzX3RibCkNCmhlYWQoYXJ2ZWRzbl9ldG9fZml4M190YmwpDQpgYGANCg0KXA0KDQpUaGlzIGlzIGp1c3Qgc2NyYXRjaGluZyB0aGUgc3VyZmFjZSBvZiBkZWFsaW5nIHdpdGggbWlzc2luZyB2YWx1ZXMhIA0KDQpTZWUgYWxzbzogW2ltcHV0ZVRTXShodHRwczovL3N0ZWZmZW5tb3JpdHouZ2l0aHViLmlvL2ltcHV0ZVRTLykgDQoNClwNCg0KIyBDSEFMTEVOR0UgIzINCg0KVGhlcmUgYXJlIGFsc28gc29tZSBtaXNzaW5nIHZhbHVlcyBpbiB0aGUgTWF4aW11bSBEYWlseSBBaXIgVGVtcGVyYXR1cmUgKERheUFpclRtcE1heCkuIA0KDQpEZWNpZGUgd2hhdCB3b3VsZCBiZSBhIHJlYXNvbmFibGUgbWV0aG9kIHRvIGRlYWwgd2l0aCB0aGVzZSwgYW5kIGltcGxlbWVudCBpdC4NCg0KXA0KDQojIFJlc2FtcGxpbmcgZGF0YQ0KDQpCaW4gdGhlIEV0byB2YWx1ZXMgaW50byBsb3csIG1lZGl1bSwgYW5kIGhpZ2g6DQoNCmBgYHtyIGNodW5rMTN9DQphcnZlZHNuX2V0b19maXgzX3RibCA8LSBhcnZlZHNuX2V0b19maXgyX3RibCB8PiANCiAgbXV0YXRlKGV0b19sZXZlbCA9IGNhc2Vfd2hlbihWYWx1ZV9GaXgyIDw9IDAuMTQgfiAibG93IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBWYWx1ZV9GaXgyIDw9IDAuMTkgfiAibWVkaXVtIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBUUlVFIH4gImhpZ2giKSkNCg0KIyBWaWV3KGFydmVkc25fZXRvX2ZpeDNfdGJsKQ0KaGVhZChhcnZlZHNuX2V0b19maXgzX3RibCkNCmBgYA0KDQpcDQoNCiMgRW5kDQoNClJlbWVtYmVyIHRvIHNhdmUgeW91ciB3b3JrIHRvIHJlbmRlciBhIEhUTUwgZmlsZS4NCg0K