Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Single Monthly Values in Scalar and Generic Classes #42

Open
wants to merge 13 commits into
base: i38-generic-var-templates
Choose a base branch
from
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ export(climdexGenericVector.csv)
export(climdexGenericVector.raw)
export(climdexInput.csv)
export(climdexInput.raw)
export(climdexSingleMonthlyScalar.csv)
export(climdexSingleMonthlyScalar.raw)
export(climdexSingleMonthlyVector.csv)
export(climdexSingleMonthlyVector.raw)
export(compute.gen.stat)
export(compute.stat.scalar)
export(compute.stat.vector)
Expand Down Expand Up @@ -69,6 +73,7 @@ import(methods)
importFrom(circular,circular)
importFrom(circular,mean.circular)
importFrom(circular,sd.circular)
importFrom(stats,na.omit)
importFrom(stats,quantile)
importFrom(utils,head)
importFrom(utils,read.csv)
Expand Down
16 changes: 11 additions & 5 deletions R/GenericVariable_utils.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Utility function to validate arguments for scalar and vector data.
check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) {

if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) {
stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.")
}
Expand All @@ -14,18 +14,24 @@ check.generic.argument.validity <- function( data, dates, max.missing.days, cale
if (missing(dates)) {
stop("Argument 'dates' is missing.")
}

if (any(is.na(dates))){
stop("Argument 'dates' has NA values.")
}

if (!is.numeric(data)) {
stop("Primary Data must be numeric.")
}

if (length(data) == 0 || length(dates) == 0) {
stop("Primary data and dates must not be empty vectors.")
}
if (length(data) != length(dates)) {
stop("Primary data and dates must have the same length.")
}

if(!is.null(dates) && !inherits(dates, "PCICt"))
stop(paste("Dates must be of class PCICt."))
if(!is.null(dates) && !inherits(dates, "PCICt")){
stop(paste("Dates must be of class PCICt."))
}


# Calendar check: verify it matches one of the recognized types
valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian")
Expand Down
110 changes: 110 additions & 0 deletions R/climdexGenericScalar.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,113 @@ climdexGenericScalar.csv <- function(

return(obj)
}
#' @title climdexSingleMonthlyScalar.raw
#'
#' @description
#' Creates a `ClimdexGenericScalar` object from raw scalar climate data with a single value per month constraint.
#'
#' @details
#' This function is a wrapper for creating `ClimdexGenericScalar` objects where temporal resolution is a single value per month.
#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond
#' to the 1st day of each month. The function will raise an error if there is more than one value per month or if any date is not on the 1st.
#'
#' @param data A numeric vector containing the scalar climate data.
#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month.
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere.
#' @param calendar A string representing the calendar type, e.g., "gregorian".
#' @return A `ClimdexGenericScalar` object containing the processed data.
#'
#' @seealso [climdexGenericScalar.raw()], [climdexSingleMonthlyScalar.csv()]
#'
#' @examples
#' \dontrun{
#' data <- runif(12, 0, 20)
#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian")
#' scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates)
#' }
#'
#' @export

climdexSingleMonthlyScalar.raw <- function(
data,
dates,
northern.hemisphere = TRUE,
calendar = "gregorian") {
max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf)

valid_dates <- dates[!is.na(dates)]
# Check if there is exactly one value per month on the 1st day
unique_months <- unique(format(valid_dates, "%Y-%m"))
day_of_month <- as.integer(format(valid_dates, "%d"))

# Check that the length of unique months matches the number of dates, ensuring only one value per month

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to require 12 months? Or is it legitimate to have fewer than that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no requirement for inputs to have multiples of 12 months. I’ve added a test to reflect that.

if (length(unique_months) != length(valid_dates)) {
stop("Data must have exactly one value per month.")
}

# Check that all dates correspond to the 1st day of each month
if (!all(day_of_month == 1)) {
stop("Data must be on the 1st day of each month.")
}

obj <- climdexGenericScalar.raw(
data = data,
dates = dates,
max.missing.days = max.missing.days,
northern.hemisphere = northern.hemisphere,
calendar = calendar
)
return(obj)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice and clean.

Q: Would it be cleaner to simplify to

return climdexGenericScalar.raw(
  ...
);

and not use obj?

(This would apply in several places in this codebase.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}

#' @title climdexSingleMonthlyScalar.csv
#'
#' @description
#' Reads scalar climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericScalar` object.
#'
#' @details
#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days`
#' to `+Inf` and builds a `ClimdexGenericScalar` object. Each date must correspond to the 1st day of each month.
#'
#' @param file The file path to the CSV containing the scalar climate data.
#' @param data.column The name of the column containing the scalar data in the CSV file.
#' @param date.columns A vector of column names corresponding to the date fields in the CSV file.
#' @param date.format A string representing the format of the date fields.
#' @param na.strings A character vector of strings to interpret as `NA`.
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere.
#' @param calendar A string representing the calendar type (e.g., "gregorian").
#'
#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data.
#'
#' @seealso [climdexSingleMonthlyScalar.raw()]
#'
#' @examples
#' \dontrun{
#' csv_file <- "path/to/scalar_data.csv"
#' scalar_obj <- climdexSingleMonthlyScalar.csv(
#' file = csv_file, data.column = "data",
#' date.columns = "date", date.format = "%Y-%m-%d"
#' )
#' }
#'
#' @export

climdexSingleMonthlyScalar.csv <- function(
file,
data.column,
date.columns,
date.format,
na.strings = NULL,
northern.hemisphere = TRUE,
calendar = "gregorian") {
GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar)

obj <- climdexSingleMonthlyScalar.raw(
data = GS.csv$data[[1]],
dates = GS.csv$dates,
northern.hemisphere = northern.hemisphere,
calendar = calendar
)

return(obj)
QSparks marked this conversation as resolved.
Show resolved Hide resolved
}
133 changes: 132 additions & 1 deletion R/climdexGenericVector.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ climdexGenericVector.raw <- function(
if (missing(secondary)) {
stop("Secondary data argument is missing.")
}
if (length(secondary) == 0) {
stop("Secondary must not be an empty vector.")
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you instead call check.generic.argument.validity on secondary? (Might need to make its stop messages more generic -- or parametrize the name of the data used in them.) That would repeat some checks but it might end up being simpler to be sure we are validating everything completely.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve moved the secondary and format checks into check.generic.argument.validity with a flag for when we pass the extra secondary and format vector parameters. I’ve moved the data & dates check to an internal validate_data_dates function that we call with scalar or primary and secondary vector data.

# Check that primary, secondary, and dates have the same length
if (length(primary) != length(secondary) || length(primary) != length(dates)) {
stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.")
Expand Down Expand Up @@ -171,4 +174,132 @@ climdexGenericVector.csv <- function(
)

return(obj)
}
}
#' @title climdexSingleMonthlyVector.raw
#'
#' @description
#' Creates a `ClimdexGenericVector` object from raw vector climate data with a single value per month constraint.
#'
#' @details
#' This function processes vector climate data and validates that there is a single value per month.
#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object.
#' To ensure consistency, each data point must correspond to the 1st day of each month.
#' The function will raise an error if there is more than one value per month or if any date is not on the 1st.
#'
#' @param primary A numeric vector representing the primary data (e.g., wind speed).
#' @param secondary A numeric or character vector representing the secondary data (e.g., wind direction).
#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month.
#' @param format A string specifying the format of the vector data ("polar", "cartesian", or "cardinal").
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere.
#' @param calendar String representing the calendar type, e.g., "gregorian".
#'
#' @return A `ClimdexGenericVector` object containing the processed vector data.
#' #'
#' @seealso [climdexGenericVector.raw()], [climdexSingleMonthlyVector.csv()]
#'
#' @examples
#' \dontrun{
#' primary <- runif(12, 0, 20)
#' secondary <- runif(12, 0, 360)
#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian")
#' vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, "polar")
#' }
#'
#' @export

climdexSingleMonthlyVector.raw <- function(
primary,
secondary,
dates,
format = "polar",
northern.hemisphere = TRUE,
calendar = "gregorian") {
max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf)

valid_dates <- dates[!is.na(dates)]
# Check if there is exactly one value per month on the 1st day
unique_months <- unique(format(valid_dates, "%Y-%m"))
day_of_month <- as.integer(format(valid_dates, "%d"))

# Check that the length of unique months matches the number of dates, ensuring only one value per month
if (length(unique_months) != length(valid_dates)) {
stop("Data must have exactly one value per month.")
}

# Check that all dates correspond to the 1st day of each month
if (!all(day_of_month == 1)) {
stop("Data must be on the 1st day of each month.")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The date checking is an exact copy of code in another function. Suggest we DRY that up in a check function called in each place.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to a util function check.single.month.dates.

}

obj <- climdexGenericVector.raw(
primary = primary,
secondary = secondary,
dates = dates,
format = format,
max.missing.days = max.missing.days,
northern.hemisphere = northern.hemisphere,
calendar = calendar
)

return(obj)
}

#' @title climdexSingleMonthlyVector.csv
#'
#' @description
#' Reads vector climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericVector` object.
#'
#' @details
#' This function reads vector climate data and validates that there is a single value per month.
#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object.
#' Each date must correspond to the 1st day of each month.
#'
#' @param file The file path to the CSV containing the vector climate data.
#' @param primary.column The name of the column containing the primary data (e.g., magnitude) in the CSV file.
#' @param secondary.column The name of the column containing the secondary data (e.g., direction) in the CSV file.
#' @param date.columns A vector of column names corresponding to the date fields in the CSV file.
#' @param date.format A string representing the format of the date fields.
#' @param format A string specifying the format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`.
#' @param na.strings A character vector of strings to interpret as `NA`.
#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere.
#' @param calendar A string representing the calendar type (e.g., "gregorian").
#'
#' @return A `ClimdexGenericVector` object containing the processed vector climate data.
#'
#' @seealso [climdexSingleMonthlyVector.raw()]
#'
#' @examples
#' \dontrun{
#' csv_file <- "path/to/vector_data.csv"
#' vector_obj <- climdexSingleMonthlyVector.csv(
#' file = csv_file, primary.column = "primary",
#' secondary.column = "secondary", date.columns = "date",
#' date.format = "%Y-%m-%d", format = "polar"
#' )
#' }
#'
#' @export

climdexSingleMonthlyVector.csv <- function(
file,
primary.column,
secondary.column,
date.columns,
date.format,
format = "polar",
na.strings = NULL,
northern.hemisphere = TRUE,
calendar = "gregorian") {
GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar)

obj <- climdexSingleMonthlyVector.raw(
primary = GV.csv$data[[1]],
secondary = GV.csv$data[[2]],
dates = GV.csv$dates,
format = format,
northern.hemisphere = northern.hemisphere,
calendar = calendar
)

return(obj)
}
20 changes: 20 additions & 0 deletions R/generic_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,33 @@ library(circular)
#' # Assuming `scalar_obj` is a valid climdexGenericScalar object:
#' \dontrun{compute.gen.stat(scalar_obj, "max", scalar_obj@data, "monthly", FALSE)}
#'
#' @importFrom stats na.omit
#'
#' @export
#' @keywords internal
compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", "seasonal"), include.exact.dates = FALSE) {
stopifnot(!is.null(data))
freq <- match.arg(freq)
exact_date_stats <- c("max", "min")

# Determine if the data is single-value per month
single_value_per_month <- all(tapply(data, [email protected]$monthly, function(x) length(na.omit(x)) == 1, simplify = TRUE))


# Check if the data is single-value per month
if (single_value_per_month) {
if (freq == "monthly") {
# Warn if trying to compute monthly stats with single-value data per month
warning("Monthly calculations on single-value-per-month data are not meaningful. Proceeding with the calculation.")
}

if (include.exact.dates) {
# Warn if exact dates are requested on single-value-per-month data
warning("Exact dates are not meaningful for single-value-per-month data. Proceeding without exact dates.")
include.exact.dates <- FALSE
}
}

if (include.exact.dates && !(stat %in% exact_date_stats)) {
message(paste("Warning: Exact dates are not applicable for the", stat, "statistic. Proceeding without exact dates."))
include.exact.dates <- FALSE
Expand Down
Loading
Loading