-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support Single Monthly Values in Scalar and Generic Classes #42
base: i38-generic-var-templates
Are you sure you want to change the base?
Changes from 10 commits
3324eac
4fa5cc8
7d026fc
bb5d3c8
b1688cc
3c229f7
7bdf395
6c3f6ff
7e6e2a3
ec6f655
c28faec
fa08c1a
600d7d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,113 @@ climdexGenericScalar.csv <- function( | |
|
||
return(obj) | ||
} | ||
#' @title climdexSingleMonthlyScalar.raw | ||
#' | ||
#' @description | ||
#' Creates a `ClimdexGenericScalar` object from raw scalar climate data with a single value per month constraint. | ||
#' | ||
#' @details | ||
#' This function is a wrapper for creating `ClimdexGenericScalar` objects where temporal resolution is a single value per month. | ||
#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond | ||
#' to the 1st day of each month. The function will raise an error if there is more than one value per month or if any date is not on the 1st. | ||
#' | ||
#' @param data A numeric vector containing the scalar climate data. | ||
#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. | ||
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. | ||
#' @param calendar A string representing the calendar type, e.g., "gregorian". | ||
#' @return A `ClimdexGenericScalar` object containing the processed data. | ||
#' | ||
#' @seealso [climdexGenericScalar.raw()], [climdexSingleMonthlyScalar.csv()] | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' data <- runif(12, 0, 20) | ||
#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") | ||
#' scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates) | ||
#' } | ||
#' | ||
#' @export | ||
|
||
climdexSingleMonthlyScalar.raw <- function( | ||
data, | ||
dates, | ||
northern.hemisphere = TRUE, | ||
calendar = "gregorian") { | ||
max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) | ||
|
||
valid_dates <- dates[!is.na(dates)] | ||
# Check if there is exactly one value per month on the 1st day | ||
unique_months <- unique(format(valid_dates, "%Y-%m")) | ||
day_of_month <- as.integer(format(valid_dates, "%d")) | ||
|
||
# Check that the length of unique months matches the number of dates, ensuring only one value per month | ||
if (length(unique_months) != length(valid_dates)) { | ||
stop("Data must have exactly one value per month.") | ||
} | ||
|
||
# Check that all dates correspond to the 1st day of each month | ||
if (!all(day_of_month == 1)) { | ||
stop("Data must be on the 1st day of each month.") | ||
} | ||
|
||
obj <- climdexGenericScalar.raw( | ||
data = data, | ||
dates = dates, | ||
max.missing.days = max.missing.days, | ||
northern.hemisphere = northern.hemisphere, | ||
calendar = calendar | ||
) | ||
return(obj) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice and clean. Q: Would it be cleaner to simplify to
and not use (This would apply in several places in this codebase.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
#' @title climdexSingleMonthlyScalar.csv | ||
#' | ||
#' @description | ||
#' Reads scalar climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericScalar` object. | ||
#' | ||
#' @details | ||
#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days` | ||
#' to `+Inf` and builds a `ClimdexGenericScalar` object. Each date must correspond to the 1st day of each month. | ||
#' | ||
#' @param file The file path to the CSV containing the scalar climate data. | ||
#' @param data.column The name of the column containing the scalar data in the CSV file. | ||
#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. | ||
#' @param date.format A string representing the format of the date fields. | ||
#' @param na.strings A character vector of strings to interpret as `NA`. | ||
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. | ||
#' @param calendar A string representing the calendar type (e.g., "gregorian"). | ||
#' | ||
#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. | ||
#' | ||
#' @seealso [climdexSingleMonthlyScalar.raw()] | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' csv_file <- "path/to/scalar_data.csv" | ||
#' scalar_obj <- climdexSingleMonthlyScalar.csv( | ||
#' file = csv_file, data.column = "data", | ||
#' date.columns = "date", date.format = "%Y-%m-%d" | ||
#' ) | ||
#' } | ||
#' | ||
#' @export | ||
|
||
climdexSingleMonthlyScalar.csv <- function( | ||
file, | ||
data.column, | ||
date.columns, | ||
date.format, | ||
na.strings = NULL, | ||
northern.hemisphere = TRUE, | ||
calendar = "gregorian") { | ||
GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar) | ||
|
||
obj <- climdexSingleMonthlyScalar.raw( | ||
data = GS.csv$data[[1]], | ||
dates = GS.csv$dates, | ||
northern.hemisphere = northern.hemisphere, | ||
calendar = calendar | ||
) | ||
|
||
return(obj) | ||
QSparks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,9 @@ climdexGenericVector.raw <- function( | |
if (missing(secondary)) { | ||
stop("Secondary data argument is missing.") | ||
} | ||
if (length(secondary) == 0) { | ||
stop("Secondary must not be an empty vector.") | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you instead call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’ve moved the |
||
# Check that primary, secondary, and dates have the same length | ||
if (length(primary) != length(secondary) || length(primary) != length(dates)) { | ||
stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.") | ||
|
@@ -171,4 +174,132 @@ climdexGenericVector.csv <- function( | |
) | ||
|
||
return(obj) | ||
} | ||
} | ||
#' @title climdexSingleMonthlyVector.raw | ||
#' | ||
#' @description | ||
#' Creates a `ClimdexGenericVector` object from raw vector climate data with a single value per month constraint. | ||
#' | ||
#' @details | ||
#' This function processes vector climate data and validates that there is a single value per month. | ||
#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. | ||
#' To ensure consistency, each data point must correspond to the 1st day of each month. | ||
#' The function will raise an error if there is more than one value per month or if any date is not on the 1st. | ||
#' | ||
#' @param primary A numeric vector representing the primary data (e.g., wind speed). | ||
#' @param secondary A numeric or character vector representing the secondary data (e.g., wind direction). | ||
#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. | ||
#' @param format A string specifying the format of the vector data ("polar", "cartesian", or "cardinal"). | ||
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. | ||
#' @param calendar String representing the calendar type, e.g., "gregorian". | ||
#' | ||
#' @return A `ClimdexGenericVector` object containing the processed vector data. | ||
#' #' | ||
#' @seealso [climdexGenericVector.raw()], [climdexSingleMonthlyVector.csv()] | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' primary <- runif(12, 0, 20) | ||
#' secondary <- runif(12, 0, 360) | ||
#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") | ||
#' vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, "polar") | ||
#' } | ||
#' | ||
#' @export | ||
|
||
climdexSingleMonthlyVector.raw <- function( | ||
primary, | ||
secondary, | ||
dates, | ||
format = "polar", | ||
northern.hemisphere = TRUE, | ||
calendar = "gregorian") { | ||
max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) | ||
|
||
valid_dates <- dates[!is.na(dates)] | ||
# Check if there is exactly one value per month on the 1st day | ||
unique_months <- unique(format(valid_dates, "%Y-%m")) | ||
day_of_month <- as.integer(format(valid_dates, "%d")) | ||
|
||
# Check that the length of unique months matches the number of dates, ensuring only one value per month | ||
if (length(unique_months) != length(valid_dates)) { | ||
stop("Data must have exactly one value per month.") | ||
} | ||
|
||
# Check that all dates correspond to the 1st day of each month | ||
if (!all(day_of_month == 1)) { | ||
stop("Data must be on the 1st day of each month.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The date checking is an exact copy of code in another function. Suggest we DRY that up in a check function called in each place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved to a util function |
||
} | ||
|
||
obj <- climdexGenericVector.raw( | ||
primary = primary, | ||
secondary = secondary, | ||
dates = dates, | ||
format = format, | ||
max.missing.days = max.missing.days, | ||
northern.hemisphere = northern.hemisphere, | ||
calendar = calendar | ||
) | ||
|
||
return(obj) | ||
} | ||
|
||
#' @title climdexSingleMonthlyVector.csv | ||
#' | ||
#' @description | ||
#' Reads vector climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericVector` object. | ||
#' | ||
#' @details | ||
#' This function reads vector climate data and validates that there is a single value per month. | ||
#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. | ||
#' Each date must correspond to the 1st day of each month. | ||
#' | ||
#' @param file The file path to the CSV containing the vector climate data. | ||
#' @param primary.column The name of the column containing the primary data (e.g., magnitude) in the CSV file. | ||
#' @param secondary.column The name of the column containing the secondary data (e.g., direction) in the CSV file. | ||
#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. | ||
#' @param date.format A string representing the format of the date fields. | ||
#' @param format A string specifying the format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`. | ||
#' @param na.strings A character vector of strings to interpret as `NA`. | ||
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. | ||
#' @param calendar A string representing the calendar type (e.g., "gregorian"). | ||
#' | ||
#' @return A `ClimdexGenericVector` object containing the processed vector climate data. | ||
#' | ||
#' @seealso [climdexSingleMonthlyVector.raw()] | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' csv_file <- "path/to/vector_data.csv" | ||
#' vector_obj <- climdexSingleMonthlyVector.csv( | ||
#' file = csv_file, primary.column = "primary", | ||
#' secondary.column = "secondary", date.columns = "date", | ||
#' date.format = "%Y-%m-%d", format = "polar" | ||
#' ) | ||
#' } | ||
#' | ||
#' @export | ||
|
||
climdexSingleMonthlyVector.csv <- function( | ||
file, | ||
primary.column, | ||
secondary.column, | ||
date.columns, | ||
date.format, | ||
format = "polar", | ||
na.strings = NULL, | ||
northern.hemisphere = TRUE, | ||
calendar = "gregorian") { | ||
GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) | ||
|
||
obj <- climdexSingleMonthlyVector.raw( | ||
primary = GV.csv$data[[1]], | ||
secondary = GV.csv$data[[2]], | ||
dates = GV.csv$dates, | ||
format = format, | ||
northern.hemisphere = northern.hemisphere, | ||
calendar = calendar | ||
) | ||
|
||
return(obj) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,13 +32,33 @@ library(circular) | |
#' # Assuming `scalar_obj` is a valid climdexGenericScalar object: | ||
#' \dontrun{compute.gen.stat(scalar_obj, "max", scalar_obj@data, "monthly", FALSE)} | ||
#' | ||
#' @importFrom stats na.omit | ||
#' | ||
#' @export | ||
#' @keywords internal | ||
compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", "seasonal"), include.exact.dates = FALSE) { | ||
stopifnot(!is.null(data)) | ||
freq <- match.arg(freq) | ||
exact_date_stats <- c("max", "min") | ||
|
||
# Determine if the data is single-value per month | ||
single_value_per_month <- all(tapply(data, [email protected]$monthly, function(x) length(na.omit(x)) == 1, simplify = TRUE)) | ||
|
||
|
||
# Check if the data is single-value per month | ||
if (single_value_per_month) { | ||
if (freq == "monthly") { | ||
# Warn if trying to compute monthly stats with single-value data per month | ||
warning("Monthly calculations on single-value-per-month data are not meaningful. Proceeding with the calculation.") | ||
} | ||
|
||
if (include.exact.dates) { | ||
# Warn if exact dates are requested on single-value-per-month data | ||
warning("Exact dates are not meaningful for single-value-per-month data. Proceeding without exact dates.") | ||
include.exact.dates <- FALSE | ||
} | ||
} | ||
|
||
if (include.exact.dates && !(stat %in% exact_date_stats)) { | ||
message(paste("Warning: Exact dates are not applicable for the", stat, "statistic. Proceeding without exact dates.")) | ||
include.exact.dates <- FALSE | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to require 12 months? Or is it legitimate to have fewer than that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no requirement for inputs to have multiples of 12 months. I’ve added a test to reflect that.