diff --git a/2_process.R b/2_process.R index 0ae562c..c4156aa 100644 --- a/2_process.R +++ b/2_process.R @@ -57,9 +57,12 @@ p2_targets_list <- list( hucs=drb_huc8s,crs_out="NAD83") ), + # Match PRMS stream segments to observation site ids and return subset of sites within + # the distance specified by search_radius (in meters) tar_target( p2_sites_w_segs, - get_site_flowlines(p1_reaches_sf, p2_site_list, sites_crs = 4269, max_matches = 1, search_radius = 0.1) + get_site_flowlines(p1_reaches_sf, p2_site_list, sites_crs = 4269, max_matches = 1, + search_radius = bird_dist_cutoff_m, retain_sites = retain_nwis_sites) ), # Pair PRMS segments with intersecting NHDPlusV2 reaches and contributing NHDPlusV2 catchments diff --git a/2_process/src/match_sites_reaches.R b/2_process/src/match_sites_reaches.R index 79444e5..81a4d39 100644 --- a/2_process/src/match_sites_reaches.R +++ b/2_process/src/match_sites_reaches.R @@ -2,23 +2,26 @@ #' from https://code.usgs.gov/wwatkins/national-site-reach/-/blob/master/R/match_sites_reaches.R #' modified by: Jeff Sadler #' Match each site with a reach (seg_id/COMID) -get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, search_radius = 0.1) { +get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, search_radius = 500, retain_sites = NULL) { #' #' @description Function to match reaches (flowlines) to point sites #' - #' @param reach_sf sf object of reach polylines with column "subsegid" and in WGS84 - #' @param sites dataframe with columns "lat" "lon" - #' @param sites_crs the crs os the sites table (i.e., 4269 for NAD83) + #' @param reach_sf sf object of reach polylines with column "subsegid" + #' @param sites dataframe with columns "site_id", "lat", and "lon" + #' @param sites_crs the crs of the sites table (i.e., 4269 for NAD83) #' @param max_matches the maximum number of segments that a point can match to - #' @param search_radius the maximum radius in same units as sf object - #' within which a segment will match (segments outside of the radius will not match) + #' @param search_radius the maximum radius in meters to match a point to a segment; + #' segments outside of search_radius will not match + #' @param retain_sites character vector indicating particular sites to retain, even if + #' distance between the site and the nearest flowline segment exceeds the search_radius; + #' the site is indicated by the `site_id` column within the sites data frame. #' #' @value A data frame the same columns as the sites input dataframe with additional columns - #' of "segidnat", "subsegid" and "offset" where "offset" is the distance (in degrees) between the point - #' and matching flowline + #' of "segidnat", "subsegid" and "bird_dist_to_subseg_m" where "bird_dist_to_subseg_m" is the + #' distance (in meters) between the point and matching flowline # set up NHDPlus fields used by get_flowline_index - # Note: that we are renaming the subsegid column to COMID because the nhdplusTools + # Note: we are renaming the subsegid column to COMID because the nhdplusTools # function requires an sf object with a "COMID" column. This does not have anything # to do with the actual nhd COMIDs # Note: the `ToMeas` and `FromMeas` are also required columns for the nhdplusTools @@ -27,7 +30,9 @@ get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, sear reaches_nhd_fields <- reach_sf %>% rename(COMID = subsegid) %>% mutate(REACHCODE = COMID, ToMeas = 100, FromMeas = 100) %>% - st_as_sf() + # project reaches to Albers Equal Area Conic so that offsets returned by + # get_flowline_index are in meters rather than degrees + st_transform(5070) sites_sf <- sites %>% rowwise() %>% filter(across(c(lon, lat), ~ !is.na(.x))) %>% @@ -35,28 +40,43 @@ get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, sear st_as_sf() %>% st_set_crs(sites_crs) %>% st_transform(st_crs(reaches_nhd_fields)) %>% st_geometry() + + # Indicate the site numbers (id's) to retain based on retain_sites + retain_rows <- which(sites$site_id %in% retain_sites) + message('matching flowlines with reaches...') + # Below, precision indicates the resolution of measure precision (in meters) in the output; + # since we are interested in a more accurate estimate of the `offset` distance between a + # point and the matched reach, set precision to 1 m. + # Conduct initial search using a larger radius (search_radius*2) than specified to + # account for any uncertainty in the RANN:nn2 radius search. Then + # filter sites to include those within the specified search_radius. flowline_indices <- nhdplusTools::get_flowline_index(flines = reaches_nhd_fields, points = sites_sf, max_matches = max_matches, - search_radius = search_radius) %>% + search_radius = search_radius*2, + precision = 1) %>% select(COMID, id, offset) %>% - rename(subsegid = COMID) + rename(subsegid = COMID, bird_dist_to_subseg_m = offset) %>% + filter(bird_dist_to_subseg_m <= search_radius | id %in% retain_rows) # nhdplusTools returns an "id" column which is just an index from 1 to # the number of sites. To later join to the site-ids, we need to add # a matching index column. sites <- rowid_to_column(sites, "id") - message("rejoining with other geometries") #rejoin to original reaches df + message("rejoining with other geometries") + sites_w_reach_ids <- sites %>% - left_join(flowline_indices, by = "id") %>% + # only retain sites that got matched to flowlines and are + # within specified search_radius + right_join(flowline_indices, by = "id") %>% select(-id) %>% # add `segidnat` column - left_join(.,reach_sf %>% - st_drop_geometry() %>% - select(subsegid,segidnat), + left_join(reach_sf %>% + select(subsegid, segidnat) %>% + sf::st_drop_geometry(), by = "subsegid") return(sites_w_reach_ids) diff --git a/_targets.R b/_targets.R index 64636fd..cd018d5 100644 --- a/_targets.R +++ b/_targets.R @@ -51,6 +51,12 @@ site_tp_select <- c("ST","ST-CA","SP") # sites 01412350, 01484272, and 01482695 coded as site type "ST" but appear to be tidally-influenced omit_nwis_sites <- c("01412350","01484272","01482695") +# Define search radius to match sites to PRMS segments (in meters) +bird_dist_cutoff_m <- 500 + +# Retain NWIS sites even if nearest PRMS segment is outside of radius set by bird_dist_cutoff_m +retain_nwis_sites <- c("01474703","01477050") + # Define PRMS mainstem segments thought to be tidally-influenced (used for filtering sites used in daily/sub-daily models) mainstem_reaches_tidal <- c("2771_1","2769_1","2768_1","2767_1","2764_1","2762_1","2763_1","2759_1","2757_1","2752_1", "2753_1","2755_1","2772_1","388_1","389_1","385_1","386_1","383_1","382_1","377_1","378_1",