This repository has been archived by the owner on Jun 30, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
2_process.R
656 lines (598 loc) · 28.2 KB
/
2_process.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
source("2_process/src/munge_inst_timeseries.R")
source("2_process/src/filter_wqp_data.R")
source("2_process/src/create_site_list.R")
source("2_process/src/match_sites_reaches.R")
source("2_process/src/raster_to_catchment_polygons.R")
source("2_process/src/combine_NLCD_PRMS.R")
source("2_process/src/munge_natural_baseflow.R")
source('2_process/src/reclassify_land_cover.R')
source("2_process/src/process_nhdv2_attr.R")
source("2_process/src/recursive_fun.R")
source("2_process/src/aggregate_observations.R")
source('2_process/src/area_diff_fix.R')
source('2_process/src/clean_lulc_data_for_merge.R')
source('2_process/src/add_dynamic_attr.R')
source('2_process/src/write_data.R')
source("2_process/src/subset_tidal_reaches.R")
p2_targets_list <- list(
# Filter harmonized WQP data for salinity data
tar_target(
p2_wqp_salinity_data,
filter_wqp_salinity_data(p1_wqp_data, major_ion_names, wqp_vars_select, omit_wqp_events,
earliest_date, latest_date, exclude_tidal = TRUE)
),
# Subset discrete SC data from harmonized WQP
# this is a 'local' target so making p3_wqp_ind_csv works
# see https://github.com/USGS-R/drb-inland-salinity-ml/issues/153
tar_target(
p2_wqp_SC_data,
subset_wqp_SC_data(p2_wqp_salinity_data, omit_duplicates = TRUE)
),
# Subset duplicated discrete SC observations from the harmonized WQP dataset;
# this target provides a record of the duplicate records that were omitted in
# p2_wqp_SC_data, but is not currently used elsewhere in the pipeline.
tar_target(
p2_wqp_SC_dups,
subset_wqp_SC_dups(p2_wqp_salinity_data, p2_wqp_SC_data)
),
# Aggregate instantaneous SC data to hourly averages
tar_target(
p2_inst_data_hourly,
aggregate_data_to_hourly(p1_inst_data,output_tz = "UTC"),
pattern = map(p1_inst_data)
),
# Aggregate instantaneous SC data to daily min/mean/max
tar_target(
p2_inst_data_daily,
aggregate_data_to_daily(p1_inst_data,p1_daily_data, min_daily_coverage=0.5,
output_tz="America/New_York") %>%
#add column to identify the data source as continuous
mutate(data_type = 'u')
),
# Combine 1) daily SC data and 2) instantaneous SC data that has been aggregated to daily
tar_target(
p2_daily_combined,
{
#handling p1_daily_data data_type column here because of our
#current tar_cue = never for p1_daily_data.
#add column to identify the data source as continuous
daily_only <- p1_daily_data
daily_only$data_type <- 'u'
bind_rows(daily_only, p2_inst_data_daily)
}
),
# Create a list of unique site id's with SC data
tar_target(
p2_site_list,
create_site_list(p2_wqp_SC_data,p1_nwis_sites,p1_daily_data,p1_inst_data,
hucs=drb_huc8s,crs_out="NAD83")
),
# Match PRMS stream segments to observation site ids and return subset of sites within
# the distance specified by search_radius (in meters)
tar_target(
p2_sites_w_segs,
get_site_flowlines(p1_reaches_sf, p2_site_list, sites_crs = 4269, max_matches = 1,
search_radius = bird_dist_cutoff_m, retain_sites = retain_nwis_sites)
),
# Pair PRMS segments with intersecting NHDPlusV2 reaches and contributing NHDPlusV2 catchments
tar_target(
p2_prms_nhdv2_xwalk,
read_csv(GFv1_NHDv2_xwalk_url, col_types = 'cccc')
),
# Melt PRMS-NHDv2 xwalk table to return all COMIDs that drain to each PRMS segment
tar_target(
p2_drb_comids_all_tribs,
p2_prms_nhdv2_xwalk %>%
select(PRMS_segid, comid_cat) %>%
tidyr::separate_rows(comid_cat,sep=";") %>%
rename(comid = comid_cat)
),
# Reshape PRMS-NHDv2 xwalk table to return all COMIDS that intersect/overlap the PRMS segments
tar_target(
p2_drb_comids_seg,
p2_prms_nhdv2_xwalk %>%
select(PRMS_segid, comid_seg) |>
tidyr::separate_rows(comid_seg,sep=";") |>
rename(comid = comid_seg)
),
# Subset PRMS-NHDv2 xwalk table to return the COMID located at the downstream end of each PRMS segment
tar_target(
p2_drb_comids_down,
p2_prms_nhdv2_xwalk %>%
select(PRMS_segid,comid_down) %>%
rename(comid = comid_down)
),
# Subset PRMS segments that may be tidally-influenced
# Based on the elevation of tidally-influenced NWIS gages in the DRB, 4 meters
# is used to indicate the likely head-of-tide.
# See https://github.com/USGS-R/drb-inland-salinity-ml/issues/241
tar_target(
p2_tidal_reaches,
subset_tidal_reaches(p1_reaches_sf, p2_drb_comids_seg, tidal_elev_m = 4)
),
tar_target(
p2_tidal_reaches_txt,
{
fileout = '2_process/out/p2_tidal_reaches.txt'
reach_df = as.data.frame(p2_tidal_reaches)
colnames(reach_df) = 'PRMS_segid'
write_csv(reach_df, fileout)
fileout
},
format = 'file',
repository = 'local'
),
tar_target(
p2_nontidal_reaches_txt,
{
fileout = '2_process/out/p2_nontidal_reaches.txt'
nontidal = p1_reaches_sf$subsegid[!(p1_reaches_sf$subsegid %in% p2_tidal_reaches)]
reach_df = as.data.frame(nontidal)
write_csv(reach_df, fileout, col_names = FALSE)
fileout
},
format = 'file',
repository = 'local'
),
#----
# NLCD Land Cover Proportions
## Filter LC data to the AOI : DRB and join with COMIDs area info and PRMS ids
# returns a df with unique comids for aoi + area of comid and NLCD LC percentage attributes
tar_target(
p2_NLCD_LC_w_catchment_area,
AOI_LC_w_area(area_att = p1_nhdv2reaches_sf %>%
st_drop_geometry() %>%
select(COMID, AREASQKM,TOTDASQKM, LENGTHKM),
NLCD_LC_df = p1_NLCD_LC_data,
aoi_comids_df = p2_drb_comids_all_tribs)
),
## Estimate LC proportion in PRMS catchment - CAT, TOT, and ACC
# returns df with proportion LC in PRMS catchment in our AOI
tar_target(
p2_PRMS_NLCD_lc_proportions_cat,
proportion_lc_by_prms(p2_NLCD_LC_w_catchment_area,
area_col = 'AREASQKM',
length_col = 'LENGTHKM',
catchment_att = 'CAT',
remove_NODATA_cols = TRUE)
),
tar_target(
p2_PRMS_NLCD_lc_proportions_tot,
proportion_lc_by_prms(p2_NLCD_LC_w_catchment_area %>%
# filtering to only the comid_downs of each PRMS - nrow = ~459
filter(comid %in% p2_drb_comids_down$comid),
area_col = 'TOTDASQKM',
length_col = 'LENGTHKM',
catchment_att = 'TOT',
remove_NODATA_cols = TRUE)
),
tar_target(
p2_PRMS_NLCD_lc_proportions_acc,
proportion_lc_by_prms(p2_NLCD_LC_w_catchment_area %>%
# filtering to only the comid_downs of each PRMS - nrow = ~459
filter(comid %in% p2_drb_comids_down$comid),
area_col = 'TOTDASQKM',
length_col = 'LENGTHKM',
catchment_att = 'ACC',
remove_NODATA_cols = TRUE)
),
## Standardize the land cover class names for NLCD to following standardized classes table - ''1_fetch/in/Reclassified_Land_Cover_IS.csv'
# For NLCD, we use '1_fetch/in/Legend_NLCD_Land_Cover.csv' as vlookup file for the FORESCE targets
# For Cat
tar_target(
p2_PRMS_NLCD_lc_proportions_reclass_cat,
reclassify_LC_for_NLCD(NLCD_lc_proportions_df = p2_PRMS_NLCD_lc_proportions_cat,
years_suffix = NLCD_year_suffix,
reclassify_table = p1_NLCD_reclass_table)
),
# For Tot
tar_target(
p2_PRMS_NLCD_lc_proportions_reclass_tot,
reclassify_LC_for_NLCD(p2_PRMS_NLCD_lc_proportions_tot,
NLCD_year_suffix,
reclassify_table = p1_NLCD_reclass_table)
),
# For Acc
tar_target(
p2_PRMS_NLCD_lc_proportions_reclass_acc,
reclassify_LC_for_NLCD(p2_PRMS_NLCD_lc_proportions_acc,
NLCD_year_suffix,
reclassify_table = p1_NLCD_reclass_table)
),
#----
# PRMS gpkg Area Fix: Fix PRMS polygon area difference between catchments edited sf and nhd comid aggregation
## Find list of PRMS_segid requiring new catchment polygon - using 5 km as the threshold
tar_target(
p2_PRMS_segid_special_handling_list,
catchment_area_check(PRMS_shapefile = p1_catchments_edited_sf,
nhd_catchment_areas = p2_NLCD_LC_w_catchment_area,
area_difference_threshold_km2 = 5)
),
## New PRMS catchment shapefile from comids - used for the PRMS_segid requiring fixed catchments for lc raster extraction :
tar_target(
p2_nhd_catchments_dissolved_sf,
dissolve_nhd_catchments_to_PRMS_segid(selected_PRMS_list = p2_PRMS_segid_special_handling_list,
PRMS_comid_df = p2_drb_comids_all_tribs,
prms_reaches_sf = p1_reaches_sf)
),
## Filtered PRMS catchments gpkg for the PRMS segid not require dnew nhd-based catchment
tar_target(
p2_filtered_catchments_edited_sf,
p1_catchments_edited_sf %>% filter(!PRMS_segid %in% p2_PRMS_segid_special_handling_list)
),
# ----
# FORESCE:Extract historical LC data raster values within catchments polygon + prop calc - general function raster_to_catchment_polygons
## This is a subset now. nrow = 702 (vs. 867 before)
tar_target(
p2_FORESCE_LC_per_catchment,
{lapply(p1_FORESCE_backcasted_LC, function(x) raster_to_catchment_polygons(
polygon_sf = p2_filtered_catchments_edited_sf,
raster = x, categorical_raster = TRUE,
raster_summary_fun = NULL,
new_cols_prefix = 'lcClass',
fill = 0))
}
),
## Proportion calculation for PRMS_segid that use fixed nhd-based catchment polygons
tar_target(
p2_FORESCE_LC_per_catchment_nhd_dissolve,
{lapply(p1_FORESCE_backcasted_LC, function(x) raster_to_catchment_polygons(
## using dissolved catchment polygons here
polygon_sf = p2_nhd_catchments_dissolved_sf,
raster = x, categorical_raster = TRUE,
raster_summary_fun = NULL,
new_cols_prefix = 'lcClass',
fill = 0) %>%
#Add column of prms_subseg_seg to match p2_FORESCE_LC_per_catchment
left_join(.,
p1_catchments_edited_sf %>%
select(PRMS_segid, prms_subseg_seg) %>%
sf::st_drop_geometry(), 'PRMS_segid')
)
}
),
## Standardize the land cover class names for NLCD to following standardized classes table - ''1_fetch/in/Reclassified_Land_Cover_IS.csv'
# For FORESCE '1_fetch/in/Legend_FORESCE_Land_Cover.csv' as vlookup file for the FORESCE targets
# reclassify FORESCE followed by aggregate to hru_segment scale across all lc classes so that it's ready for x walk - output remains list of dfs for the 5 decade years covered by FORESCE
## reclassifying on original subsetted FORESCE LC proportions datasets
tar_target(
p2_FORESCE_LC_per_catchment_PRMS_reclass_cat,
{purrr::map2(.x = p2_FORESCE_LC_per_catchment,
.y = FORESCE_years,
.f = ~{reclassify_land_cover(land_cover_df = .x,
reclassify_table = p1_FORESCE_reclass_table,
reclassify_table_lc_col = 'FORESCE_value',
reclassify_table_reclass_col = 'Reclassify_match',
pivot_longer_contains = 'lcClass',
proportion_col_prefix = 'prop_lcClass',
hru_area_colname = hru_area_km2,
remove_NA_cols = TRUE) %>%
# See documentation in function
## group by with both hru and prms because we need prms_subseg_seg to run the recursive function for upstream catchments
aggregate_proportions_hrus(group_by_segment_colname = vars(PRMS_segid,prms_subseg_seg),
proportion_col_prefix = 'prop_lcClass',
hru_area_colname = hru_area_km2,
new_area_colname = total_PRMS_area) %>%
## Adding Year column
mutate(Year = .y) %>%
## Reorder the land cover columns
select(order(colnames(.)))}
)
}
),
## Reclassifying on fixed FORESCE LC proportions datasets + cleaning/munging for rbind
tar_target(
p2_FORESCE_LC_per_catchment_nhd_dissolve_reclass_cat,
{purrr::map2(.x = p2_FORESCE_LC_per_catchment_nhd_dissolve,
.y = FORESCE_years,
.f = ~{reclassify_land_cover(land_cover_df = .x,
reclassify_table = p1_FORESCE_reclass_table,
reclassify_table_lc_col = 'FORESCE_value',
reclassify_table_reclass_col = 'Reclassify_match',
pivot_longer_contains = 'lcClass',
proportion_col_prefix = 'prop_lcClass',
hru_area_colname = total_PRMS_area,
remove_NA_cols = TRUE) %>%
## rearranging cols
select(PRMS_segid, everything(), -ID) %>%
## Adding Year column
mutate(Year = .y) %>%
## Reorder the land cover columns
select(order(colnames(.)))}
)
}
),
## merge correct and fixed p2_FORESCE_LC_per_catchments_reclass_cat
tar_target(
p2_FORESCE_LC_per_catchment_reclass_cat,
{map2(.x = p2_FORESCE_LC_per_catchment_PRMS_reclass_cat,
.y = p2_FORESCE_LC_per_catchment_nhd_dissolve_reclass_cat,
.f = ~rbind(.x, .y))}
),
## Produce subset of p1_prms_reach_attr for p2_FORESCE_LC_per_catchment_reclass_tot target via recursively calculating proportions of LC class across all upstream segments for a given segment
tar_target(
p2_prms_attribute_df,
p1_prms_reach_attr %>% select(subseg_id,subseg_seg,from_segs,to_seg) %>%
# renaming so that we can distinguish from p2_FORESCE_LC_per_catchment_reclass_cat$PRMS_segid col
rename(., PRMS_segid_main = subseg_id) %>%
# Update `from_segs` col by splitting the individual segs in a list (can then loop through the list)
mutate(from_segs = stringr::str_split(string = from_segs, pattern = ';', simplify = F)) %>%
rowwise() %>%
# Collect all upstream segs per individual seg_id using recursive_fun() (row wise application)
mutate(all_from_segs = list(recursive_fun(x = subseg_seg, df = ., col1 = 'subseg_seg', col2 = 'from_segs'))) %>%
# un-nest to have new rows for each upstream catchment
unnest(all_from_segs, keep_empty = TRUE)
),
# Produce p2_FORESCE_LC_per_catchment_reclass_tot
tar_target(
p2_FORESCE_LC_per_catchment_reclass_tot,
{lapply(p2_FORESCE_LC_per_catchment_reclass_cat, function(x)
p2_prms_attribute_df %>%
# join prop calculations
right_join(x, by = c('all_from_segs' = 'prms_subseg_seg')) %>%
# group by PRMS id
group_by(PRMS_segid_main, Year) %>%
summarise(
# calc. total area
total_upstream_PRMS_area = sum(total_PRMS_area),
# get proportions for the new total area
across(starts_with('prop'), ~(sum((.x*total_PRMS_area)/total_upstream_PRMS_area))),
.groups = 'drop_last') %>%
drop_na() %>%
rename(., PRMS_segid = PRMS_segid_main) %>%
ungroup() %>%
# Add prms_subseg_seg
right_join(x %>% select(PRMS_segid, prms_subseg_seg),
by = c('PRMS_segid' = 'PRMS_segid')) %>%
# Reorder the columns
select(order(colnames(.))) %>%
# Reorder the rows
arrange(PRMS_segid)
)}
),
# Combine NLCD and FORESCE
## Cat
tar_target(
p2_all_lulc_data_cat,
{rbind(
clean_lulc_data_for_merge(p2_FORESCE_LC_per_catchment_reclass_cat,
columns_to_remove = 'prms_subseg_seg', prms_area_col = 'total_PRMS_area', prms_area_unit = 'km2', prop_prefix = 'CAT'),
clean_lulc_data_for_merge(p2_PRMS_NLCD_lc_proportions_reclass_cat,
columns_to_remove = NULL, prms_area_col = 'AREASQKM_PRMS', prms_area_unit = 'km2', prop_prefix = 'CAT'))
}
),
## Tot
## note - rbind only works when same number of cols
tar_target(
p2_all_lulc_data_tot,
{rbind(
clean_lulc_data_for_merge(p2_FORESCE_LC_per_catchment_reclass_tot,
columns_to_remove = 'prms_subseg_seg', prms_area_col = 'total_upstream_PRMS_area', prms_area_unit = 'km2', prop_prefix = 'TOT'),
clean_lulc_data_for_merge(p2_PRMS_NLCD_lc_proportions_reclass_tot,
columns_to_remove = NULL, prms_area_col = 'AREASQKM_PRMS', prms_area_unit = 'km2', prop_prefix = 'TOT'))
}
),
# Extract Road Salt raster values to catchments polygons in the DRB - general function raster_to_catchment_polygons + Aggregate to hru_segment scale across each annual road salt df in list of p2_rdsalt_per_catchment - can then xwalk
tar_target(
p2_rdsalt_per_catchment,
{lapply(p1_rdsalt, function(x) raster_to_catchment_polygons(polygon_sf = p1_catchments_edited_sf,
raster = x,
categorical_raster = FALSE,
raster_summary_fun = sum,
new_cols_prefix = 'rd_slt', na.rm = T) %>%
group_by(PRMS_segid) %>%
summarise(across(starts_with('rd_sltX'), sum)))
}
),
# Combine rd salt targets - from list of dfs to single df with added columns
# that summarize salt accumulation across all years.
tar_target(
p2_rdsalt_per_catchment_allyrs,
# Reduce can iterate through elements in a list 1 after another
Reduce(function(...) merge(..., by = 'PRMS_segid'),
p2_rdsalt_per_catchment) %>%
# Calculate total salt accumulation across all years
mutate(rd_salt_all_years = rowSums(across(starts_with('rd_sltX')), na.rm = TRUE)) %>%
# Calculate prop of catchment rd salt acc across entire basin
mutate(rd_salt_all_years_prop_drb = round((rd_salt_all_years/sum(rd_salt_all_years)),8)) %>%
# Remove annual rd_saltXYr cols
select(-starts_with('rd_sltX')) %>%
arrange(desc(rd_salt_all_years_prop_drb))
),
# Filter discrete samples from sites thought to be influenced by tidal extent
tar_target(
p2_wqp_SC_filtered,
subset_wqp_nontidal(p2_wqp_SC_data,p2_sites_w_segs,mainstem_reaches_tidal) %>%
#add data source type as discrete estimate
mutate(data_type = 'd') %>%
#remove bad sample (explained here https://github.com/USGS-R/drb-inland-salinity-ml/issues/190)
#only remove if equal to the exact value
filter(!(ActivityStartDate == '1999-03-30' &
MonitoringLocationIdentifier == 'USGS-01421618' &
resultVal2 == 30200))
),
# Filter SC site list
tar_target(
p2_site_list_nontidal_csv,
create_site_list_nontidal(p2_wqp_SC_filtered,p1_nwis_sites,p1_daily_data,p1_inst_data,
hucs=drb_huc8s,crs_out="NAD83",p2_sites_w_segs,
"2_process/out/DRB_SC_sitelist_nontidal.csv"),
format = "file"
),
# Return SC observations aggregated to the PRMS segment
tar_target(
p2_SC_observations,
aggregate_observations(p2_wqp_SC_filtered, p2_daily_combined, p2_sites_w_segs,
aggr_method = "reach", prefer_nwis_sites = FALSE)
),
# Return natural baseflow estimates for each PRMS segment
tar_target(
p2_natural_baseflow,
munge_natural_baseflow(baseflow_pred_files = p1_natural_baseflow_csv,
segs_w_comids = p2_prms_nhdv2_xwalk %>%
select(PRMS_segid,comid_down) %>%
rename('COMID' = 'comid_down'),
start_year = as.character(lubridate::year(earliest_date)),
end_year = as.character(lubridate::year(latest_date)),
fill_all_years = TRUE) %>%
#Fill in NA reach from neighbors
refine_dynamic_from_neighbors(attr_i = 'mean_natl_baseflow_cfs',
prms_reach_attr = p2_prms_attribute_df,
drainage_area = p2_nhdv2_attr_refined %>%
select(PRMS_segid, TOT_BASIN_AREA))
),
# Process NHDv2 attributes referenced to cumulative upstream area;
# returns object target of class "list". List elements for CAT_PPT
# and ACC_PPT (if TOT is selected below) will only contain the
# PRMS_segid column and so will functionally be omitted when
# creating the `p2_nhdv2_attr` target below
tar_target(
p2_nhdv2_attr_upstream,
process_cumulative_nhdv2_attr(p1_vars_of_interest_downloaded_csvs,
segs_w_comids = p2_drb_comids_down,
cols = c("TOT")),
pattern = map(p1_vars_of_interest_downloaded_csvs),
iteration = "list"
),
# Process NHDv2 attributes scaled to the catchment that directly drains to each PRMS segment;
# returns object target of class "list" that is nested and contains the aggregated data as well
# as a separate NA diagnostics data table for each NHDv2 attribute
tar_target(
p2_nhdv2_attr_catchment,
process_catchment_nhdv2_attr(p1_vars_of_interest_downloaded_csvs,
vars_table = p1_vars_of_interest,
segs_w_comids = p2_drb_comids_all_tribs,
nhd_lines = p1_nhdv2reaches_sf),
pattern = map(p1_vars_of_interest_downloaded_csvs),
iteration = "list"
),
# Create combined NHDv2 attribute data frame that includes both the cumulative upstream and catchment-scale values
tar_target(
p2_nhdv2_attr,
create_nhdv2_attr_table(p2_nhdv2_attr_upstream, p2_nhdv2_attr_catchment) %>%
#add CAT road salt to static attributes
left_join(p2_rdsalt_per_catchment_allyrs %>%
select(PRMS_segid, rd_salt_all_years_prop_drb) %>%
rename(CAT_rdsalt_prop = rd_salt_all_years_prop_drb), by = 'PRMS_segid')
),
#Refine the attributes that are used for modeling
tar_target(
p2_nhdv2_attr_refined,
refine_features(nhdv2_attr = p2_nhdv2_attr,
prms_nhdv2_xwalk = p2_prms_nhdv2_xwalk,
nhdv2_reaches = p1_nhdv2reaches_sf,
prms_attribute_df = p2_prms_attribute_df,
#PHYSIO_AREA says which proportion of catchments are
#covered by physiographic regions
#RUN7100 seems like it is by HUC02 instead of reach.
#RFACT is perfectly correlated with RF7100
drop_columns = c("PHYSIO_AREA", "RUN7100", "RFACT"))
),
#Lags to compute for dynamic attributes
tar_target(
p2_dyn_attr_lags,
#' @param attribute short attribute name used to select columns containing
#' that name, or 'Land', 'Met', 'Baseflow' as keywords to process those columns.
#' @param lags numeric vector stating how many lag_units to lag.
#' A column will be added for each element in lags for each attrs column.
#' A lag of 0 can be used to extract the value on the Date in the dyn_df tbl.
#' @param lag_unit character vector containing the unit to use for each lag in lags.
#' Accepts any of the lubridate options (e.g., days, months, years). If all
#' units are the same, can provide a one element vector with that unit.
data.frame(attribute = c('HDENS', 'MAJOR', 'NDAMS', 'NORM_STORAGE', 'NID_STORAGE',
'Land',
'Met',
'Baseflow'),
lags = I(list(c(10,20), c(10,20), c(10,20), c(10,20), c(10,20),
c(5,10,15,20),
c(1,3,7,15,30,90,180,1,5),
c(1,3,6,1,5))),
lag_unit = I(list('years', 'years', 'years', 'years', 'years',
'years',
c(rep('days', 7), 'years', 'years'),
c(rep('months', 3), 'years', 'years'))),
lag_fxns = I(list(c('exact','mean'), c('exact','mean'),
c('exact','mean'), c('exact','mean'),
c('exact','mean'),
c('exact','mean'),
c('mean'),
c('mean')))
)
),
#Dataframe of dynamic attributes
tar_target(
p2_dyn_attr,
add_dyn_attrs_to_reaches(attrs = p2_nhdv2_attr_refined,
dyn_cols = c('HDENS', 'MAJOR', 'NDAMS', 'NORM',
'NID'),
start_date = earliest_date,
end_date = latest_date,
baseflow = p2_natural_baseflow,
CAT_Land = p2_all_lulc_data_cat,
Upstream_Land = p2_all_lulc_data_tot,
gridMET = p1_gridmet,
attr_prefix = c('CAT', 'TOT'),
Upstream_Land_prefix = 'TOT',
lag_table = p2_dyn_attr_lags)
),
#Remove static attributes that were made into dynamic attributes
tar_target(
p2_nhdv2_attr_refined_rm_dyn,
select(p2_nhdv2_attr_refined, -contains('HDENS'), -contains('MAJOR'),
-contains('NDAMS'), -contains('NORM'), -contains('NID'), -hru_segment)
),
#Join static attributes to dynamic dataframe
tar_target(
p2_all_attr,
left_join(p2_dyn_attr, p2_nhdv2_attr_refined_rm_dyn, by = c('seg' = 'PRMS_segid')) %>%
rename(PRMS_segid = seg)
),
#Join attributes to SC observations and retain only the days with SC observations
tar_target(
p2_all_attr_SC_obs,
left_join(p2_SC_observations, p2_all_attr, by = c('subsegid' = 'PRMS_segid', 'Date')) %>%
rename(PRMS_segid = subsegid) %>%
filter(Date >= earliest_date)
),
#Attribute table with barren, urban and forest land cover,
# and physiographic regions for use in analysis functions
tar_target(
p2_TOT_lc_physio_attrs,
select(p2_all_attr_SC_obs, PRMS_segid, Date, TOT_LC3_0, TOT_LC4_0, TOT_LC6_0) %>%
rename(lowurban = TOT_LC3_0,
midurban = TOT_LC4_0,
forest = TOT_LC6_0) %>%
left_join(y = p1_reaches_ecoreg_sf %>%
select(subsegid, "AP", "BR", "CP", "PD", "VR") %>%
st_drop_geometry(),
by = c('PRMS_segid' = 'subsegid'))
),
#Save a table indicating number of observations for each reach by data type
tar_target(
p2_reach_data_type,
summarize(group_by(p2_all_attr_SC_obs, PRMS_segid, data_type),
data_type = unique(data_type),
n_obs = n())
),
#Save attributes and observations as zarr file for use in river-dl
tar_target(
p2_all_attr_zarr,
#Save only the attributes after correlation screening to cut down total saved
write_df_to_zarr(p2_all_attr %>%
select(PRMS_segid, Date,
all_of(p4_screened_attrs)),
index_cols = c("Date", "PRMS_segid"),
"2_process/out/drb_attrs_PRMS.zarr"),
format = "file",
repository = "local"
),
tar_target(
p2_SC_obs_zarr,
write_df_to_zarr(p2_all_attr_SC_obs %>%
select(PRMS_segid, Date, mean_value),
index_cols = c("Date", "PRMS_segid"),
"2_process/out/drb_SC_obs_PRMS.zarr"),
format = "file",
repository = "local"
)
)