MacroSheds data retrieval and flux methods

Retrieving discharge, precipitation, and chemistry data to compute solute fluxes and annual loads

Weston Slaughter, Mike Vlah, Spencer Rhea


Install and load the macrosheds package

remotes::install_github('MacroSHEDS/macrosheds')
library(macrosheds)

help(package = 'macrosheds')

Identify target sites and variables

Let’s say we’re looking for watersheds with pre-1970 data records that are still in operation.

?ms_load_sites

ms_sites <- ms_load_sites()
colnames(ms_sites) # sites are grouped into domains, which are grouped into networks
##  [1] "network"          "network_fullname" "domain"           "domain_fullname" 
##  [5] "site_code"        "site_fullname"    "stream_name"      "site_type"       
##  [9] "ws_status"        "latitude"         "longitude"        "epsg_code"       
## [13] "ws_area_ha"       "n_observations"   "n_variables"      "first_record"    
## [17] "last_record"      "timezone_olson"
ms_sites %>%
    filter(site_type == 'stream_gauge',
           first_record < as.Date('1970-01-01'),
           last_record > as.Date('2020-01-01')) %>%
    select(domain, site_code, first_record, last_record) %>%
    print(n = 18)
## # A tibble: 39 × 4
##    domain    site_code first_record last_record
##    <chr>     <chr>     <date>       <date>     
##  1 hbef      w1        1956-01-01   2023-12-31 
##  2 hbef      w2        1956-01-01   2023-12-31 
##  3 hbef      w3        1956-01-01   2023-12-31 
##  4 hbef      w4        1956-01-01   2023-12-31 
##  5 hbef      w5        1956-01-01   2023-12-31 
##  6 hbef      w6        1956-01-01   2023-12-31 
##  7 hbef      w7        1956-01-01   2023-12-31 
##  8 hbef      w8        1956-01-01   2023-12-31 
##  9 hbef      w9        1956-01-01   2023-12-31 
## 10 hjandrews GSWS01    1951-11-21   2020-09-30 
## 11 hjandrews GSWS02    1951-11-21   2020-09-30 
## 12 hjandrews GSWS03    1951-11-21   2020-09-30 
## 13 hjandrews GSWS06    1951-11-21   2020-09-30 
## 14 hjandrews GSWS07    1951-11-21   2020-09-30 
## 15 hjandrews GSWS08    1951-11-21   2020-09-30 
## 16 hjandrews GSWS09    1951-11-21   2020-09-30 
## 17 hjandrews GSWS10    1951-11-21   2020-09-30 
## 18 hjandrews GSMACK    1951-11-21   2020-09-30 
## # ℹ 21 more rows

And maybe we’re interested in nitrate.

ms_load_variables(var_set = 'timeseries') %>%
    filter(grepl('nitrate', variable_name, ignore.case = TRUE)) %>%
    distinct(variable_code, variable_name)
## # A tibble: 6 × 2
##   variable_code variable_name                        
##   <chr>         <chr>                                
## 1 d15N_NO3      delta 15 nitrogen of nitrate         
## 2 d18O_NO3      delta 18 oxygen of nitrate           
## 3 NO3           nitrate                              
## 4 NO3_NO2       nitrate and nitrite                  
## 5 NO3_N         nitrate-nitrogen                     
## 6 NO3_NO2_N     nitrate-nitrogen and nitrite-nitrogen
ms_load_variables(var_set = 'timeseries_by_site') %>%
    filter(variable_code == 'NO3_N', #MacroSheds doesn't store NO3 data, but you can convert NO3-N using ms_conversions()
           observations > 1000) %>%
    distinct(domain) %>%
    pull()
##  [1] "bear"           "boulder"        "catalina_jemez" "shale_hills"   
##  [5] "east_river"     "baltimore"      "bonanza"        "hjandrews"     
##  [9] "hbef"           "konza"          "luquillo"       "mcmurdo"       
## [13] "niwot"          "neon"           "fernow"         "krew"          
## [17] "santee"         "suef"           "loch_vale"      "panola"        
## [21] "sleepers"

Retrieve and load time-series data

HBEF and H. J. Andrews look like good candidates. Next, retrieve discharge and nitrate data for one site from each domain. You can have as many macrosheds_root directories as you like, but they should only be used for MacroSheds data.

my_domains <- c('hbef', 'hjandrews')
my_sites <- c('w1', 'GSWS10')
my_ms_dir <- './example/data'
options(timeout = 300) #default 60 seconds might not be enough if your connection is slow

ms_download_core_data(
    macrosheds_root = my_ms_dir,
    domains = my_domains #use "all" to retrieve all networks, domains, and sites.
)

q <- ms_load_product(
    my_ms_dir,
    prodname = 'discharge',
    site_codes = my_sites
)

no3 <- ms_load_product(
    my_ms_dir,
    prodname = 'stream_chemistry',
    filter_vars = 'NO3_N',
    site_codes = my_sites
)

Calculate flux and load

This yields daily nitrate-N fluxes and annual nitrate-N loads (i.e. cumulative fluxes) for stream water. Note that you can also retrieve these quantities directly using ms_load_product.

no3_flux <- ms_calc_flux(no3, q)

#you can convert flux from kg/ha/d to kg/d (and back)
ms_undo_scale_flux_by_area(no3_flux)
## # A tibble: 20,023 × 7
##    date       site_code grab_sample var     val ms_status ms_interp
##    <date>     <chr>           <dbl> <chr> <dbl>     <dbl>     <dbl>
##  1 1970-04-28 w1                  1 NO3_N 1.14          0         0
##  2 1970-04-29 w1                  1 NO3_N 0.845         0         1
##  3 1970-04-30 w1                  1 NO3_N 0.629         0         1
##  4 1970-05-01 w1                  1 NO3_N 0.512         0         1
##  5 1970-05-02 w1                  1 NO3_N 0.427         0         1
##  6 1970-05-03 w1                  1 NO3_N 0.497         0         1
##  7 1970-05-04 w1                  1 NO3_N 0.417         0         1
##  8 1970-05-05 w1                  1 NO3_N 0.328         0         1
##  9 1970-05-06 w1                  1 NO3_N 0.345         0         1
## 10 1970-05-07 w1                  1 NO3_N 0.295         0         1
## # ℹ 20,013 more rows
no3_load_out <- ms_calc_load(filter(no3, ms_interp == 0),
                             filter(q, ms_interp == 0))
## Warning in ms_calc_load(filter(no3, ms_interp == 0), filter(q, ms_interp == :
## Negative/infinite load values detected.
no3_load <- filter(no3_load_out$load, ms_recommended)

plot(no3_load$water_year[no3_load$site_code == 'w1'],
     no3_load$load[no3_load$site_code == 'w1'], type = 'l', xlab = '',
     ylab = 'NO3-N load (kg/ha/yr)', lwd = 2)
lines(no3_load$water_year[no3_load$site_code == 'GSWS10'],
     no3_load$load[no3_load$site_code == 'GSWS10'], type = 'l', col = 'blue', lwd = 2)
legend('topright', legend = c('w1', 'GSWS10'), col = c('black', 'blue'), lwd = 2, bty = 'n')

As you can see from the warning, not all load methods are appropriate for every use case. Consider the ms_recommended column and check the docs (?ms_calc_load) and these references for more information:

Calculate precipitation flux

Note that you can use ms_calc_flux to compute daily precipitation solute fluxes as well. Many domains provide their own gauged precipitation data, but you will often get better coverage and across-site comparison by using a gridded product like PRISM. Here’s how to compute daily SO4-S influxes using PRISM precip.

ms_load_variables(var_set = 'ws_attr') %>%
    filter(data_class == 'climate',
           grepl('precip', variable_name))
## # A tibble: 3 × 5
##   variable_code variable_name                       unit  data_class data_source
##   <chr>         <chr>                               <chr> <chr>      <chr>      
## 1 precip_mean   multi-year mean of annual precip_m… mm    climate    PRISM      
## 2 precip_median daily watershed-median precipitati… mm    climate    PRISM      
## 3 precip_sd     daily standard deviation of waters… mm    climate    PRISM
ms_download_ws_attr(
    macrosheds_root = my_ms_dir,
    dataset = 'time series' #watershed attribute (spatially-explicit) time series. This file is over 1.5 GiB
)

precip <- ms_load_product(
    my_ms_dir,
    prodname = 'ws_attr_timeseries:climate',
    filter_vars = 'precip_median',
    site_codes = my_sites,
    warn = FALSE
)

so4 <- ms_load_product(
    my_ms_dir,
    prodname = 'stream_chemistry',
    filter_vars = 'SO4_S',
    site_codes = my_sites
) %>%
    select(-ms_status, -ms_interp)

so4_precip_flux <- ms_calc_flux(so4, precip)

Cite your sources

attrib <- ms_generate_attribution(bind_rows(so4, no3, precip, q))
cat(head(attrib$bibliography, n = 2), '...\n')
## @article{vlah_etal_macrosheds_2023,
##  author = {Vlah, Michael J. and Rhea, Spencer and Bernhardt, Emily S. and Slaughter, Weston and Gubbins, Nick and DelVecchia, Amanda G. and Thellman, Audrey and Ross, Matthew R. V.},
##  title = {MacroSheds: A synthesis of long-term biogeochemical, hydroclimatic, and geospatial data from small watershed ecosystem studies},
##  journal = {Limnology and Oceanography Letters},
##  volume = {8},
##  number = {3},
##  pages = {419-452},
##  doi = {https://doi.org/10.1002/lol2.10325},
##  url = {https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lol2.10325},
##  year = {2023},
## }
##  @misc{usda forest service_hubbard_2024,
##  title = {{Hubbard} {Brook} {Experimental} {Forest}: {Instantaneous} {Streamflow} by {Watershed}, 1956 – present},
##  publisher = {Environmental Data Initiative},
##  author = {{USDA Forest Service, Northern Research Station}},
##  year = {2024},
##  doi = {https://doi.org/10.6073/pasta/62e4a77cb23b70fd9164520f129a25ac}
## }
##  ...