This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
2_process.R
197 lines (176 loc) · 6.79 KB
/
2_process.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
source("2_process/src/munge_inst_timeseries.R")
source("2_process/src/create_site_list.R")
source("2_process/src/summarize_site_list.R")
source("2_process/src/match_sites_reaches.R")
source("2_process/src/calc_daily_light.R")
source("2_process/src/metab_utils.R")
source("2_process/src/process_nhdv2_attributes.R")
source("1_fetch/src/write_data.R")
# Explicitly attach sf package to handle geometry data when mapping over `p1_reaches_sf`
library(sf)
p2_targets_list <- list(
# Filter DO data to only include approved records
tar_target(
p2_inst_data_filtered,
filter(p1_inst_data, !grepl("P", Value_Inst_cd))
),
tar_target(
p2_daily_data_filtered,
p1_daily_data %>%
mutate(Value = if_else(grepl("P", Value_cd), NA_real_, Value),
Value_Max = if_else(grepl("P", Value_Max_cd), NA_real_, Value_Max),
Value_Min = if_else(grepl("P", Value_Min_cd), NA_real_, Value_Min)) %>%
filter(!is.na(Value)|!is.na(Value_Max)|!is.na(Value_Min))
),
# Aggregate instantaneous DO data to daily min/mean/maxs
tar_target(
p2_inst_data_daily,
aggregate_data_to_daily(inst_data = p2_inst_data_filtered,
daily_data = p2_daily_data_filtered,
min_daily_coverage = 0.5,
output_tz = "America/New_York")
),
# Combine 1) daily DO data and 2) instantaneous DO data that has been aggregated to daily
tar_target(
p2_daily_combined,
bind_rows(p2_daily_data_filtered, p2_inst_data_daily)
),
# Create a list of unique site locations containing DO data
tar_target(
p2_site_list,
create_site_list(wqp_data = NULL,
nwis_sites = p1_nwis_sites,
nwis_daily_data = p2_daily_data_filtered,
nwis_inst_data = p2_inst_data_filtered,
hucs = drb_huc8s,
crs_out = "NAD83")
),
# Create and save log file containing data availability summary
tar_target(
p2_sitelist_summary_csv,
summarize_site_list(p2_site_list, p2_daily_data_filtered, p2_inst_data_filtered,
fileout = "2_process/log/sitelist_summary.csv"),
format = "file"
),
# Match NHDPlusv2 flowlines to observation site ids and return subset of sites
# within the distance specified by search_radius (in meters)
tar_target(
p2_sites_w_segs,
{
# Flowlines with no catchments do not have any associated climate driver data,
# so omit any flowlines where AREASQKM == 0 before matching sites to reaches.
nhd_reaches_w_cats <- p1_nhd_reaches_sf %>%
filter(AREASQKM > 0)
sites_w_segs <- get_site_nhd_flowlines(nhd_lines = nhd_reaches_w_cats,
sites = p2_site_list,
sites_crs = 4269,
max_matches = 1,
search_radius = 500)
# update site to reach matches based on p1_ref_gages_manual
sites_w_segs_QC <- sites_w_segs %>%
left_join(y = p1_ref_gages_manual[,c("id","COMID_QC")],
by = c("site_id" = "id")) %>%
mutate(COMID_updated = ifelse(site_id %in% p1_ref_gages_manual$id,
COMID_QC, COMID)) %>%
filter(!is.na(COMID_updated)) %>%
select(-c(COMID, bird_dist_to_comid_m, COMID_QC)) %>%
rename(COMID = COMID_updated)
}
),
# Add the segment ids as a new column to the daily combined data
tar_target(
p2_daily_with_seg_ids,
{
seg_and_site_ids <- p2_sites_w_segs %>%
select(site_id, COMID)
left_join(p2_daily_combined, seg_and_site_ids,
by=c("site_no" = "site_id")) %>%
rename(site_id = site_no,
date = Date,
do_mean = Value,
do_min = Value_Min,
do_max = Value_Max
)
}
),
# Save the daily combined data with segment ids to a csv file
tar_target(
p2_daily_with_seg_ids_csv,
write_to_csv(p2_daily_with_seg_ids, "2_process/out/daily_do_data.csv"),
format = "file"
),
# make list of "well-observed" sites
# for now only considering NWIS sites
# only sites with observations above `min_obs_days` are kept
# where `min_obs_days` is defined in _targets.R
tar_target(
p2_well_observed_sites,
p2_sites_w_segs %>%
filter(count_days_nwis >= min_obs_days) %>%
pull(site_id)
),
# filter p1_reaches_sf to segments with "well-observed" sites
tar_target(
p2_well_observed_reaches,
{
well_obs_reach_ids <- p2_sites_w_segs %>%
filter(site_id %in% p2_well_observed_sites) %>%
pull(COMID)
p1_nhd_reaches_sf %>% filter(COMID %in% well_obs_reach_ids)
}
),
# Estimate daily (normalized) max-light
tar_target(
p2_daily_max_light,
{
calc_seg_light_ratio(p2_well_observed_reaches,
start_date = earliest_date,
end_date = latest_date)
},
pattern = map(p2_well_observed_reaches)
),
# Filter daily metabolism estimates based on model diagnostics
tar_target(
p2_metab_filtered,
filter_metab_sites(p1_metab,p1_metab_diagnostics,
sites = p2_daily_with_seg_ids$site_id,
model_conf_vals = c("H"),
cutoff_ER_K_corr = 0.4)
),
# Read in the individual catchment attribute tables, replace any missing values
# (-9999 or -9998) with NA, and combine into a list.
tar_target(
p2_cat_attr_list,
process_attr_tables(p1_sb_attributes_downloaded_csvs,
cols = c("CAT","TOT"),
nlcd_reclass_table = p1_nlcd_reclassification_table),
pattern = map(p1_sb_attributes_downloaded_csvs),
iteration = "list"
),
# Subset NHDPlusv2 value-added attributes (VAA) table and replace any missing
# values (-9999 or -9998) with NA.
tar_target(
p2_nhdv2_vaa_attr,
process_nhdv2_vaa(nhd_lines = p1_nhd_reaches_sf,
vaa_cols = c("SLOPE","TOTDASQKM"))
),
# Loop through the catchment attribute list and join individual data frames by
# COMID. Combine catchment attributes with NHDPlusv2 value-added attributes (vaa)
# and subset the data frame to the COMIDs included in the site list.
tar_target(
p2_seg_attr_data,
combine_nhdv2_attr(nhd_vaa = p2_nhdv2_vaa_attr,
cat_attr_list = p2_cat_attr_list)
),
# Subset the DRB meteorological data to only include the NHDPlusv2 catchments (COMID)
# that correspond with observation locations.
tar_target(
p2_met_data_at_obs_sites,
{
reticulate::source_python("2_process/src/subset_nc_to_comid.py")
subset_nc_to_comids(p1_drb_nhd_gridmet, p2_well_observed_reaches$COMID) %>%
as_tibble() %>%
relocate(c(COMID,time), .before = "tmmx")
}
)
)