Skip to content
This repository has been archived by the owner on Jun 30, 2023. It is now read-only.

Commit

Permalink
Merge pull request #116 from lekoenig/112-match-segs
Browse files Browse the repository at this point in the history
Edit search radius used for site-to-reach matching
  • Loading branch information
lkoenig-usgs authored Mar 18, 2022
2 parents 30126d4 + f456ae7 commit d0691b7
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 18 deletions.
5 changes: 4 additions & 1 deletion 2_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ p2_targets_list <- list(
hucs=drb_huc8s,crs_out="NAD83")
),

# Match PRMS stream segments to observation site ids and return subset of sites within
# the distance specified by search_radius (in meters)
tar_target(
p2_sites_w_segs,
get_site_flowlines(p1_reaches_sf, p2_site_list, sites_crs = 4269, max_matches = 1, search_radius = 0.1)
get_site_flowlines(p1_reaches_sf, p2_site_list, sites_crs = 4269, max_matches = 1,
search_radius = bird_dist_cutoff_m, retain_sites = retain_nwis_sites)
),

# Pair PRMS segments with intersecting NHDPlusV2 reaches and contributing NHDPlusV2 catchments
Expand Down
54 changes: 37 additions & 17 deletions 2_process/src/match_sites_reaches.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,26 @@
#' from https://code.usgs.gov/wwatkins/national-site-reach/-/blob/master/R/match_sites_reaches.R
#' modified by: Jeff Sadler
#' Match each site with a reach (seg_id/COMID)
get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, search_radius = 0.1) {
get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, search_radius = 500, retain_sites = NULL) {
#'
#' @description Function to match reaches (flowlines) to point sites
#'
#' @param reach_sf sf object of reach polylines with column "subsegid" and in WGS84
#' @param sites dataframe with columns "lat" "lon"
#' @param sites_crs the crs os the sites table (i.e., 4269 for NAD83)
#' @param reach_sf sf object of reach polylines with column "subsegid"
#' @param sites dataframe with columns "site_id", "lat", and "lon"
#' @param sites_crs the crs of the sites table (i.e., 4269 for NAD83)
#' @param max_matches the maximum number of segments that a point can match to
#' @param search_radius the maximum radius in same units as sf object
#' within which a segment will match (segments outside of the radius will not match)
#' @param search_radius the maximum radius in meters to match a point to a segment;
#' segments outside of search_radius will not match
#' @param retain_sites character vector indicating particular sites to retain, even if
#' distance between the site and the nearest flowline segment exceeds the search_radius;
#' the site is indicated by the `site_id` column within the sites data frame.
#'
#' @value A data frame the same columns as the sites input dataframe with additional columns
#' of "segidnat", "subsegid" and "offset" where "offset" is the distance (in degrees) between the point
#' and matching flowline
#' of "segidnat", "subsegid" and "bird_dist_to_subseg_m" where "bird_dist_to_subseg_m" is the
#' distance (in meters) between the point and matching flowline

# set up NHDPlus fields used by get_flowline_index
# Note: that we are renaming the subsegid column to COMID because the nhdplusTools
# Note: we are renaming the subsegid column to COMID because the nhdplusTools
# function requires an sf object with a "COMID" column. This does not have anything
# to do with the actual nhd COMIDs
# Note: the `ToMeas` and `FromMeas` are also required columns for the nhdplusTools
Expand All @@ -27,36 +30,53 @@ get_site_flowlines <- function(reach_sf, sites, sites_crs, max_matches = 1, sear
reaches_nhd_fields <- reach_sf %>%
rename(COMID = subsegid) %>%
mutate(REACHCODE = COMID, ToMeas = 100, FromMeas = 100) %>%
st_as_sf()
# project reaches to Albers Equal Area Conic so that offsets returned by
# get_flowline_index are in meters rather than degrees
st_transform(5070)

sites_sf <- sites %>% rowwise() %>%
filter(across(c(lon, lat), ~ !is.na(.x))) %>%
mutate(Shape = list(st_point(c(lon, lat), dim = "XY"))) %>%
st_as_sf() %>% st_set_crs(sites_crs) %>%
st_transform(st_crs(reaches_nhd_fields)) %>%
st_geometry()

# Indicate the site numbers (id's) to retain based on retain_sites
retain_rows <- which(sites$site_id %in% retain_sites)

message('matching flowlines with reaches...')
# Below, precision indicates the resolution of measure precision (in meters) in the output;
# since we are interested in a more accurate estimate of the `offset` distance between a
# point and the matched reach, set precision to 1 m.
# Conduct initial search using a larger radius (search_radius*2) than specified to
# account for any uncertainty in the RANN:nn2 radius search. Then
# filter sites to include those within the specified search_radius.
flowline_indices <- nhdplusTools::get_flowline_index(flines = reaches_nhd_fields,
points = sites_sf,
max_matches = max_matches,
search_radius = search_radius) %>%
search_radius = search_radius*2,
precision = 1) %>%
select(COMID, id, offset) %>%
rename(subsegid = COMID)
rename(subsegid = COMID, bird_dist_to_subseg_m = offset) %>%
filter(bird_dist_to_subseg_m <= search_radius | id %in% retain_rows)

# nhdplusTools returns an "id" column which is just an index from 1 to
# the number of sites. To later join to the site-ids, we need to add
# a matching index column.
sites <- rowid_to_column(sites, "id")

message("rejoining with other geometries")
#rejoin to original reaches df
message("rejoining with other geometries")

sites_w_reach_ids <- sites %>%
left_join(flowline_indices, by = "id") %>%
# only retain sites that got matched to flowlines and are
# within specified search_radius
right_join(flowline_indices, by = "id") %>%
select(-id) %>%
# add `segidnat` column
left_join(.,reach_sf %>%
st_drop_geometry() %>%
select(subsegid,segidnat),
left_join(reach_sf %>%
select(subsegid, segidnat) %>%
sf::st_drop_geometry(),
by = "subsegid")

return(sites_w_reach_ids)
Expand Down
6 changes: 6 additions & 0 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ site_tp_select <- c("ST","ST-CA","SP")
# sites 01412350, 01484272, and 01482695 coded as site type "ST" but appear to be tidally-influenced
omit_nwis_sites <- c("01412350","01484272","01482695")

# Define search radius to match sites to PRMS segments (in meters)
bird_dist_cutoff_m <- 500

# Retain NWIS sites even if nearest PRMS segment is outside of radius set by bird_dist_cutoff_m
retain_nwis_sites <- c("01474703","01477050")

# Define PRMS mainstem segments thought to be tidally-influenced (used for filtering sites used in daily/sub-daily models)
mainstem_reaches_tidal <- c("2771_1","2769_1","2768_1","2767_1","2764_1","2762_1","2763_1","2759_1","2757_1","2752_1",
"2753_1","2755_1","2772_1","388_1","389_1","385_1","386_1","383_1","382_1","377_1","378_1",
Expand Down

0 comments on commit d0691b7

Please sign in to comment.