Skip to content

Commit

Permalink
Use "valid_through"; less ambiguous than "valid_until"
Browse files Browse the repository at this point in the history
  • Loading branch information
bpbond committed Dec 3, 2023
1 parent 4b94de5 commit b987f89
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
6 changes: 3 additions & 3 deletions synoptic/L1_normalize.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ f <- function(fn, out_dir, design_table) {
sort = FALSE)
# This is a left join, and normally should not have changed the number of rows
# The exception would be if a sensor has been reassigned; in that case it will have
# >1 entry in the design table, with the "valid_until" column controlling when the
# >1 entry in the design table, with the "valid_through" column controlling when the
# old assignment becomes invalid and the new one takes over. Call valid_entries()
# (in helpers.R) to figure out which mappings should apply.
message("\tChecking for multiple-match design links...")
dat_retain <- valid_entries(objects = dat$loggernet_variable,
times = ymd_hms(dat$TIMESTAMP, tz = "EST"),
valid_until = dat$valid_until)
valid_through = dat$valid_through)
message("\tDropping ", sum(!dat_retain), " out-of-date design links")
dat <- dat[dat_retain,]
dat$valid_until <- NULL
dat$valid_through <- NULL
# At this point, there should be exactly one match for every loggernet variable
if(nrow(dat) > old_rows) {
Expand Down
2 changes: 1 addition & 1 deletion synoptic/data_TEST/design_table.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Site,Logger,Table,loggernet_variable,design_link,valid_until,research_name,note
Site,Logger,Table,loggernet_variable,design_link,valid_through,research_name,note
PTR,Compass_PTR_UP_313,CheckTable,Format,,,,
PTR,Compass_PTR_UP_313,CheckTable,RECORD,,,,
PTR,Compass_PTR_UP_313,CheckTable,BattV,BattV-PTR-UP,,voltage,
Expand Down
8 changes: 4 additions & 4 deletions synoptic/data_TEST/design_table_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Design table fields:
| Table | Datalogger table name |
| loggernet_variable | Datalogger variable name |
| design_link | Design link (see below) |
| valid_until | Expiry date of design link |
| valid_through | Expiry date of design link |
| research_name | Type of measurement |
| Note | Note |

Expand All @@ -25,9 +25,9 @@ Design links tend to follow a pattern of {sensor}-{datum}-{site}-{plot}, e.g.
`GW-Salinity-PTR-UP` (groundwater, salinity, Portage River, upland). The format
is currently not enforced or consistent, however.

The `valid_until` column is used when a sensor is reassigned, for example if a tree
The `valid_through` column is used when a sensor is reassigned, for example if a tree
dies and we reassign its sapflux sensor to a new tree, and encodes the last valid
date for a given design link. In this case the loggernet variable has _two_ entries
(rows): the original assignment, with a `valid_until` entry, and the new assignment,
with a blank `valid_until` entry.
(rows): the original assignment, with a YYYY-MM-DD `valid_through` entry, and the new assignment,
with a blank `valid_through` entry.

38 changes: 19 additions & 19 deletions synoptic/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ list_directories <- function(dir_list, outfile = "", prefix = "",
# The design links might not be stable over time; for example, if a tree
# dies, its sensor might get reassigned to a new tree. In this case the
# design_link table will have two entries, one for the old assignment and
# one for the new. We know which one to use by the "valid_until" column,
# one for the new. We know which one to use by the "valid_through" column,
# which give a end date for a design link--or, most commonly, it will be
# empty (NA) indicating that there is no end date.
#
Expand All @@ -276,24 +276,24 @@ list_directories <- function(dir_list, outfile = "", prefix = "",
# design links.
#
# This function uses the object (i.e. group identifier; typically, Logger+
# Table+Loggernet_variable), timestamp, and valid_until timestamps to identify
# Table+Loggernet_variable), timestamp, and valid_through timestamps to identify
# which rows to keep (correct design_link assignment) and which to drop.
valid_entries <- function(objects, times, valid_until) {
# Nothing to do if there are no valid_until entries
if(all(is.na(valid_until))) return(rep(TRUE, length(objects())))
valid_entries <- function(objects, times, valid_through) {
# Nothing to do if there are no valid_through entries
if(all(is.na(valid_through))) return(rep(TRUE, length(objects())))

# Any NA valid_until entries apply into the far future
valid_until[is.na(valid_until)] <- ymd_hms("2999-12-31 11:59:00")
past_valid_time <- times > valid_until
# Any NA valid_through entries apply into the far future
valid_through[is.na(valid_through)] <- ymd_hms("2999-12-31 11:59:00")
past_valid_time <- times > valid_through

# Create a data frame to aggregate and then merge, below
x <- data.frame(obj = objects, time = times, vu = valid_until)
# Compute the minimum valid_until entry for each object and time that is
# not past the valid_until point; this is the 'controlling' value
x <- data.frame(obj = objects, time = times, vu = valid_through)
# Compute the minimum valid_through entry for each object and time that is
# not past the valid_through point; this is the 'controlling' value
y <- aggregate(vu ~ obj + time, data = x[!past_valid_time,], FUN = min)
names(y)[3] <- "controlling"

# Figure out controlling valid_until for each object/time
# Figure out controlling valid_through for each object/time
z <- merge(x, y, all.x = TRUE)
# An NA controlling entry means there is none
valids <- z$vu == z$controlling
Expand All @@ -307,14 +307,14 @@ test_data <- data.frame(obj = c(1, 1, 1, 2, 2, 2), time = c(1, 2, 3, 1, 2, 3))
# Object 2 changes its design link after time 2
test_dt <- data.frame(obj = c(1,2,2),
dl = c("A", "B", "C"),
valid_until = c(NA, 2, NA))
valid_through = c(NA, 2, NA))
# Merge the 'data' with the 'design link table'
x <- merge(test_data, test_dt)
# Call valid_entries. It figures out that all the object 1 entries should be
# retained, but 1 of 2 entries in each timestep should be dropped for object 2.
# This is because there are two design_table entries for it (see above); the
# first ends at time point 2, and the second is indefinite after that.
valid_entries(x$obj, x$time, x$valid_until)
valid_entries(x$obj, x$time, x$valid_through)

# Test code for valid_entries

Expand All @@ -330,20 +330,20 @@ stopifnot(ret == c(FALSE, TRUE, FALSE, TRUE))
# One object, shifts
ret <- valid_entries(c(1, 1, 1, 1), c(2, 2, 3, 3), c(2, NA, 2, NA))
stopifnot(ret == c(TRUE, FALSE, FALSE, TRUE))
# One objects, shifts twice (valid_untils at 1 and 2)
# One objects, shifts twice (valid_throughs at 1 and 2)
ret <- valid_entries(objects = rep(1, 9),
times = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
valid_until = c(1, 2, NA, 1, 2, NA, 1, 2, NA))
valid_through = c(1, 2, NA, 1, 2, NA, 1, 2, NA))
stopifnot(ret == c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE))
# Two objects, only one shifts
ret <- valid_entries(objects = c(1, 1, 1, 2, 2, 2, 2, 2, 2),
times = c(1, 2, 3, 1, 1, 2, 2, 3, 3),
valid_until = c(NA, NA, NA, 2, NA, 2, NA, 2, NA))
valid_through = c(NA, NA, NA, 2, NA, 2, NA, 2, NA))
stopifnot(ret == c(TRUE, TRUE, TRUE, # obj 1
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE)) # obj 2
# There's a valid_until but no new entry
# There's a valid_through but no new entry
ret <- valid_entries(objects = c(1, 1),
times = c(1, 2),
valid_until = c(1, 1))
valid_through = c(1, 1))
stopifnot(ret == c(TRUE, FALSE))

0 comments on commit b987f89

Please sign in to comment.