Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebuild data pipeline for Water Year 2022 #77

Merged
merged 8 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions 0_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ targets:
##-- Fetch configs --##

viz_start_date:
command: as.Date(I('2021-01-01'))
command: as.Date(I('2021-10-01'))
viz_end_date:
command: as.Date(I('2021-12-31'))
command: as.Date(I('2022-09-30'))
# Use bounding box in case we want to do this regionally someday
# Start with CONUS only
# I don't think this is being used any more ...
Expand Down
6 changes: 4 additions & 2 deletions 0_historic/src/fetch_nwis_historic.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,11 @@ adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
# `from` and `to` values are the same, don't
# change anything about the dates.
tz_conversion_xwalk <- tibble(
# NWIS dates appear to always be reported as standard time,
# So we shouldn't subtract an hour when going from DT to ST
from = c('DT', 'ST', 'ST', 'DT'),
to = c('ST', 'DT', 'ST', 'DT'),
conversion_sec = c(-3600, 3600, 0, 0)
conversion_sec = c(0, 3600, 0, 0)
)

# There could be more than one timezone if the date range spans across
Expand All @@ -195,7 +197,7 @@ adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
to = stringr::str_sub(tz_desired, -2, -1)
) %>%
# Join in conversion xwalk
left_join(tz_conversion_xwalk) %>%
left_join(tz_conversion_xwalk, by = c("from", "to")) %>%
# Alter the date values to match the desired timezone.
mutate(out_dates = in_dates + conversion_sec) %>%
# Pull out just the dates to return
Expand Down
5 changes: 4 additions & 1 deletion 1_fetch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ targets:

##-- Historic GW sites and data --##

# This data comes from the Dev VPC with `us-west-2`

last_updated:
command: c(I('2021-10-20'))
command: c(I('2022-09-19 10:00 AM'))

# All sites (including those with years < min_years)
1_fetch/out/historic_gw_site_info_unfiltered.rds:
Expand Down Expand Up @@ -98,6 +100,7 @@ targets:
depends:
- viz_start_date
- viz_end_date
- '1_fetch/src/fetch_nwis.R'

# Combine all data
1_fetch/out/gw_data.csv:
Expand Down
8 changes: 5 additions & 3 deletions 1_fetch/src/fetch_nwis.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ convert_uv_to_dv <- function(target_name, gw_uv_data_fn, site_tz_xwalk) {

### Reduce each instantaneous value to a single average for each date
group_by(site_no, Date) %>%
summarize(GWL = mean(GWL_inst, na.rm = TRUE)) %>%
summarize(GWL = mean(GWL_inst, na.rm = TRUE), .groups = "keep") %>%
write_feather(target_name)
}

Expand Down Expand Up @@ -107,9 +107,11 @@ adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
# `from` and `to` values are the same, don't
# change anything about the dates.
tz_conversion_xwalk <- tibble(
# NWIS dates appear to always be reported as standard time,
# So we shouldn't subtract an hour when going from DT to ST
from = c('DT', 'ST', 'ST', 'DT'),
to = c('ST', 'DT', 'ST', 'DT'),
conversion_sec = c(-3600, 3600, 0, 0)
conversion_sec = c(0, 3600, 0, 0)
)

# There could be more than one timezone if the date range spans across
Expand All @@ -126,7 +128,7 @@ adjust_for_daylight_savings <- function(posix_dates, tz_desired) {
to = stringr::str_sub(tz_desired, -2, -1)
) %>%
# Join in conversion xwalk
left_join(tz_conversion_xwalk) %>%
left_join(tz_conversion_xwalk, by = c("from", "to")) %>%
# Alter the date values to match the desired timezone.
mutate(out_dates = in_dates + conversion_sec) %>%
# Pull out just the dates to return
Expand Down
14 changes: 12 additions & 2 deletions 2_process.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,23 @@ targets:
# current implementation, this means any site that used pcode == '72019'
depth_below_sites:
command: c(gw_sites_dv, gw_sites_uv)


# Filter any dates outside of our range to visualize This shouldn't
# be needed forever but due to Issue #78, it is for now
# https://github.com/USGS-VIZLAB/gw-conditions/issues/78
2_process/out/gw_daily_viz_range.csv:
command: subset_to_date_range(
target_name,
daily_data_fn = "1_fetch/out/gw_data.csv",
start_date = viz_start_date,
end_date = viz_end_date)

# Calculate quantiles of each daily value
2_process/out/gw_daily_quantiles.csv:
command: compare_to_historic(
target_name,
historic_quantile_fn = "1_fetch/out/historic_gw_quantiles.csv",
current_data_fn = "1_fetch/out/gw_data.csv",
current_data_fn = "2_process/out/gw_daily_viz_range.csv",
inverse_sites = depth_below_sites)

# Summarize GWL data quantiles as very high, high, normal, low, or very low
Expand Down
11 changes: 10 additions & 1 deletion 2_process/src/prep_data_for_visualizing.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,13 @@ generate_months <- function(file_out, data_in){
# label years on first month they appear
mutate(year_label = ifelse(day_seq == min(day_seq), year, NA)) %>%
write_csv(file_out)
}
}

# Make sure the data being displayed and used to create labels
# fits within the time range asked for
subset_to_date_range <- function(file_out, daily_data_fn, start_date, end_date) {
read_csv(daily_data_fn) %>%
filter(Date >= start_date,
Date <= end_date) %>%
write_csv(file_out)
}
1 change: 1 addition & 0 deletions 3_visualize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ targets:
- visualizations/data/gw-conditions-peaks-timeseries.csv.ind
- visualizations/data/gw-conditions-daily-proportions.csv.ind
- src/assets/gw-conditions-peaks-map.svg # Currently a temporary fix!
- 3_visualize/out/gw-conditions-peaks-timeseries-s3copy.ind

3_visualize/out/gw-conditions-peaks-map.svg:
command: build_peaks_svg(
Expand Down
2 changes: 1 addition & 1 deletion 3_visualize/out/gw-conditions-peaks-timeseries-s3copy.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 5712d491ea306dfb1a4e861ed54e6cf2
hash: 5231ce48f14301823508dafc1c6654fb

2 changes: 1 addition & 1 deletion gw-conditions/historic_gw_data_filtered.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 2975ba5bc35acc29f9a51d463594190c
hash: d07a353da92f433b2b539f7b1ec529a0

2 changes: 1 addition & 1 deletion gw-conditions/historic_gw_data_unfiltered.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: eda380bbd7f3f384c3b1719035bd80f9
hash: 2051329e1454531db078afc23a6a5e5c

2 changes: 1 addition & 1 deletion gw-conditions/historic_gw_quantiles.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: bc49e934336aa5a31262e97675b3e8d5
hash: 2c8c9383802a6c93623209dcb63ce94a

2 changes: 1 addition & 1 deletion gw-conditions/historic_gw_site_info_filtered.rds.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 2ee7ab3937551ebd1cfc8f5e45d2e54e
hash: ec8ad75c9cb378306f6c45c549050156

2 changes: 1 addition & 1 deletion gw-conditions/historic_gw_site_info_unfiltered.rds.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 674be56e47bc7020e9cbbb62f161a54f
hash: b722e0402d1469484bcc8743dfc2213f

18 changes: 1 addition & 17 deletions src/assets/gw-conditions-peaks-map.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: dacdd629a8902f55b7465b47f213f3b7
hash: 2bb7bc7d0e8936938a3a5b0216e40b52

2 changes: 1 addition & 1 deletion visualizations/data/gw-conditions-peaks-timeseries.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 5712d491ea306dfb1a4e861ed54e6cf2
hash: 5231ce48f14301823508dafc1c6654fb

2 changes: 1 addition & 1 deletion visualizations/data/gw-conditions-site-coords.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: d7fccac37ae07d325d0a5fa9f0c51b99
hash: db0f5040f8220b104c9fc2c5df72438c

2 changes: 1 addition & 1 deletion visualizations/data/gw-conditions-time-labels.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 3b8c1167d2279d8f16da3ed15ad3f422
hash: d171203f4553505d599287a95a120338