From 3324eac510cc105dac30d10c069cff8d1a68861e Mon Sep 17 00:00:00 2001 From: Quintin Date: Fri, 25 Oct 2024 15:10:11 -0700 Subject: [PATCH 01/10] WIP: Adding support for single vals per month --- NAMESPACE | 5 ++ R/climdexGenericScalar.R | 92 ++++++++++++++++++++++++++++++++ R/climdexGenericVector.R | 110 ++++++++++++++++++++++++++++++++++++++- R/generic_stats.R | 20 +++++++ 4 files changed, 226 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index b4b7320..464dbbe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,10 @@ export(climdexGenericVector.csv) export(climdexGenericVector.raw) export(climdexInput.csv) export(climdexInput.raw) +export(climdexSingleMonthlyScalar.csv) +export(climdexSingleMonthlyScalar.raw) +export(climdexSingleMonthlyVector.csv) +export(climdexSingleMonthlyVector.raw) export(compute.stat.scalar) export(compute.stat.vector) export(compute_circular_mean) @@ -68,6 +72,7 @@ import(methods) importFrom(circular,circular) importFrom(circular,mean.circular) importFrom(circular,sd.circular) +importFrom(stats,na.omit) importFrom(stats,quantile) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index f9783f0..bd54d36 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -127,3 +127,95 @@ climdexGenericScalar.csv <- function( return(obj) } + +#' @title climdexSingleMonthlyScalar.raw +#' +#' @description +#' Creates a `ClimdexGenericScalar` object from raw scalar climate data with a single value per month constraint. +#' +#' @details +#' This function is a wrapper for creating `ClimdexGenericScalar` objects where temporal resolution is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond +#' to the 1st day of each month. The function will raise an error if there is more than one value per month or if any date is not on the 1st. +#' +#' @param data A numeric vector containing the scalar climate data. +#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type, e.g., "gregorian". +#' @return A `ClimdexGenericScalar` object containing the processed data. +#' +#' @export + +climdexSingleMonthlyScalar.raw <- function( + data, + dates, + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) + + # Check if there is exactly one value per month on the 1st day + unique_months <- unique(format(dates, "%Y-%m")) + day_of_month <- as.integer(format(dates, "%d")) + + # Check that the length of unique months matches the number of dates, ensuring only one value per month + if (length(unique_months) != length(dates)) { + stop("Data must have exactly one value per month.") + } + + # Check that all dates correspond to the 1st day of each month + if (!all(day_of_month == 1)) { + stop("Data must be on the 1st day of each month.") + } + + obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar + ) + return(obj) +} + +#' @title climdexSingleMonthlyScalar.csv +#' +#' @description +#' Reads scalar climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericScalar` object. +#' +#' @details +#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days` +#' to `+Inf` and builds a `ClimdexGenericScalar` object. Each date must correspond to the 1st day of each month. +#' +#' @param file The file path to the CSV containing the scalar climate data. +#' @param data.column The name of the column containing the scalar data in the CSV file. +#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. +#' +#' @export + +climdexSingleMonthlyScalar.csv <- function( + file, + data.column, + date.columns, + date.format, + na.strings = NULL, + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar) + + obj <- climdexSingleMonthlyScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + calendar = calendar + ) + + return(obj) +} diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index 8858c66..fb349df 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -171,4 +171,112 @@ climdexGenericVector.csv <- function( ) return(obj) -} \ No newline at end of file +} + +#' @title climdexSingleMonthlyVector.raw +#' +#' @description +#' Creates a `ClimdexGenericVector` object from raw vector climate data with a single value per month constraint. +#' +#' @details +#' This function processes vector climate data and validates that there is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. +#' To ensure consistency, each data point must correspond to the 1st day of each month. +#' The function will raise an error if there is more than one value per month or if any date is not on the 1st. +#' +#' @param primary A numeric vector representing the primary data (e.g., wind speed). +#' @param secondary A numeric or character vector representing the secondary data (e.g., wind direction). +#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. +#' @param format A string specifying the format of the vector data ("polar", "cartesian", or "cardinal"). +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar String representing the calendar type, e.g., "gregorian". +#' +#' @return A `ClimdexGenericVector` object containing the processed vector data. +#' +#' @export + +climdexSingleMonthlyVector.raw <- function( + primary, + secondary, + dates, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) + + # Check if there is exactly one value per month on the 1st day + unique_months <- unique(format(dates, "%Y-%m")) + day_of_month <- as.integer(format(dates, "%d")) + + # Check that the length of unique months matches the number of dates, ensuring only one value per month + if (length(unique_months) != length(dates)) { + stop("Data must have exactly one value per month.") + } + + # Check that all dates correspond to the 1st day of each month + if (!all(day_of_month == 1)) { + stop("Data must be on the 1st day of each month.") + } + + obj <- climdexGenericVector.raw( + primary = primary, + secondary = secondary, + dates = dates, + format = format, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar + ) + + return(obj) +} + +#' @title climdexSingleMonthlyVector.csv +#' +#' @description +#' Reads vector climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericVector` object. +#' +#' @details +#' This function reads vector climate data and validates that there is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. +#' Each date must correspond to the 1st day of each month. +#' +#' @param file The file path to the CSV containing the vector climate data. +#' @param primary.column The name of the column containing the primary data (e.g., magnitude) in the CSV file. +#' @param secondary.column The name of the column containing the secondary data (e.g., direction) in the CSV file. +#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param format A string specifying the format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericVector` object containing the processed vector climate data. +#' +#' @export + +climdexSingleMonthlyVector.csv <- function( + file, + primary.column, + secondary.column, + date.columns, + date.format, + format = "polar", + na.strings = NULL, + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) + + obj <- climdexSingleMonthlyVector.raw( + primary = GV.csv$data[[1]], + secondary = GV.csv$data[[2]], + dates = GV.csv$dates, + format = format, + northern.hemisphere = northern.hemisphere, + calendar = calendar + ) + + return(obj) +} diff --git a/R/generic_stats.R b/R/generic_stats.R index feaf940..e89d691 100644 --- a/R/generic_stats.R +++ b/R/generic_stats.R @@ -22,6 +22,7 @@ library(circular) #' If exact dates are requested for statistics that do not support it (e.g., mean, sum, sd, var), the function will print a message #' and proceed without exact dates. #' +#' @importFrom stats na.omit #' @seealso \code{\link{compute.stat.scalar}}, \code{\link{compute.stat.vector}} #' #' @note @@ -38,6 +39,25 @@ compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", freq <- match.arg(freq) exact_date_stats <- c("max", "min") + # Determine if the data is single-value per month + actual_month_factor <- factor(format(gen.var@dates, "%Y-%m")) + single_value_per_month <- all(tapply(data, actual_month_factor, function(x) length(na.omit(x)) == 1, simplify = TRUE)) + + + # Check if the data is single-value per month + if (single_value_per_month) { + if (freq == "monthly") { + # Warn if trying to compute monthly stats with single-value data per month + warning("Monthly calculations on single-value-per-month data are not meaningful. Proceeding with the calculation.") + } + + if (include.exact.dates) { + # Warn if exact dates are requested on single-value-per-month data + warning("Exact dates are not meaningful for single-value-per-month data. Proceeding without exact dates.") + include.exact.dates <- FALSE + } + } + if (include.exact.dates && !(stat %in% exact_date_stats)) { message(paste("Warning: Exact dates are not applicable for the", stat, "statistic. Proceeding without exact dates.")) include.exact.dates <- FALSE From 4fa5cc88b96f5e95a3bb0311f55e6d37d3c0a2c7 Mon Sep 17 00:00:00 2001 From: Quintin Date: Fri, 25 Oct 2024 15:11:09 -0700 Subject: [PATCH 02/10] Add testing for single val per month --- tests/test_single_value_per_month.R | 139 ++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/test_single_value_per_month.R diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R new file mode 100644 index 0000000..eb03999 --- /dev/null +++ b/tests/test_single_value_per_month.R @@ -0,0 +1,139 @@ +library(climdex.pcic) +library(RUnit) + +climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { + set.seed(123) + + # Raw data construction using 1st of each month + scalar_data <- runif(12, 0, 20) # One value per month + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + scalar_obj_raw <- climdexSingleMonthlyScalar.raw( + data = scalar_data, + dates = dates, + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(scalar_obj_raw@data), length(scalar_obj_raw@dates), "Raw scalar construction: data length does not match dates.") + + csv_data <- data.frame(date = as.character(dates), data = scalar_data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + scalar_obj_csv <- climdexSingleMonthlyScalar.csv( + file = temp_csv, + data.column = "data", + date.columns = "date", + date.format = "%Y-%m-%d", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(scalar_obj_csv@data), length(scalar_obj_csv@dates), "CSV scalar construction: data length does not match dates.") + checkEquals(scalar_obj_raw@dates, scalar_obj_csv@dates, "Date mismatch between raw and CSV scalar objects.") + checkTrue(all.equal(scalar_obj_csv, scalar_obj_raw), msg = "Scalar_obj built from CSV is not identical to raw") +} + +climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { + set.seed(123) + + # Raw data construction using 1st of each month + primary_data <- runif(12, 0, 20) # One value per month + secondary_data <- runif(12, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + vector_obj_raw <- climdexSingleMonthlyVector.raw( + primary = primary_data, + secondary = secondary_data, + dates = dates, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(vector_obj_raw@primary), length(vector_obj_raw@dates), "Raw vector construction: primary length does not match dates.") + + csv_data <- data.frame(date = as.character(dates), primary = primary_data, secondary = secondary_data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + vector_obj_csv <- climdexSingleMonthlyVector.csv( + file = temp_csv, + primary.column = "primary", + secondary.column = "secondary", + date.columns = "date", + date.format = "%Y-%m-%d", + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(vector_obj_csv@primary), length(vector_obj_csv@dates), "CSV vector construction: primary length does not match dates.") + checkEquals(vector_obj_raw@dates, vector_obj_csv@dates, "Date mismatch between raw and CSV vector objects.") + checkTrue(all.equal(vector_obj_csv, vector_obj_raw), msg = "Vector_obj built from CSV is not identical to raw") +} + +# Test for scalar with missing values in data +climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { + set.seed(123) + + # Simulate single monthly value data with an NA value + data <- c(1:5, NA, 7:12) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Create the climdexSingleMonthlyScalar object and expect it to pass without failure + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + + checkTrue(length(scalar_obj@data) == length(scalar_obj@dates), msg = "NA values in single monthly scalar data are not handled correctly.") +} + +# Test for vector with missing values in primary data +climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { + set.seed(123) + + # Simulate single monthly value vector data with an NA value in the primary component + primary <- c(1:5, NA, 7:12) + secondary <- runif(12, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Create the climdexSingleMonthlyVector object and expect it to pass without failure + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian") + + checkTrue(length(vector_obj@primary) == length(vector_obj@dates), msg = "NA values in single monthly vector data are not handled correctly.") +} + +# Test with zero-length scalar data +climdex.pcic.test.SingleMonthlyScalar.raw.zero.length <- function() { + set.seed(123) + + # Create an empty vector and dates + data <- numeric(0) + dates <- as.PCICt(character(0), cal = "gregorian") + checkException(climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Zero-length data not handled correctly.") +} + +# Test with out-of-sequence dates and sorted check +climdex.pcic.test.OutOfSequenceDates <- function() { + set.seed(123) + + # Create scalar data with out-of-sequence dates + data <- 1:12 + dates <- as.PCICt(c("2020-01-01", "2020-03-01", "2020-05-01", "2020-02-01", "2020-04-01", "2020-06-01", + "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", "2020-12-01"), + cal = "gregorian") + + # Create the scalar object with out-of-sequence dates + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + + # Extract the non-NA data and corresponding dates from the object + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] + obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] + + sorted_indices <- order(dates) + sorted_dates <- dates[sorted_indices] + sorted_data <- data[sorted_indices] + + checkEquals(obj_data, sorted_data, + "Object Data is not aligned correctly with sorted dates.") +} From bb5d3c8bed0043a4cd7892477c357660c1008599 Mon Sep 17 00:00:00 2001 From: Quintin Date: Mon, 28 Oct 2024 11:10:14 -0700 Subject: [PATCH 03/10] Add examples and cross-refs to docs --- R/climdexGenericScalar.R | 18 ++++++++++++++++++ R/climdexGenericVector.R | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index bd54d36..b0652e9 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -144,6 +144,15 @@ climdexGenericScalar.csv <- function( #' @param calendar A string representing the calendar type, e.g., "gregorian". #' @return A `ClimdexGenericScalar` object containing the processed data. #' +#' @seealso [climdexGenericScalar.raw()], [climdexSingleMonthlyScalar.csv()] +#' +#' @examples +#' \dontrun{ +#' data <- runif(12, 0, 20) +#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") +#' scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates) +#' } +#' #' @export climdexSingleMonthlyScalar.raw <- function( @@ -197,6 +206,15 @@ climdexSingleMonthlyScalar.raw <- function( #' #' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. #' +#' @seealso [climdexSingleMonthlyScalar.raw()] +#' +#' @examples +#' \dontrun{ +#' csv_file <- "path/to/scalar_data.csv" +#' scalar_obj <- climdexSingleMonthlyScalar.csv(file = csv_file, data.column = "data", +#' date.columns = "date", date.format = "%Y-%m-%d") +#' } +#' #' @export climdexSingleMonthlyScalar.csv <- function( diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index fb349df..f4fa294 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -192,6 +192,16 @@ climdexGenericVector.csv <- function( #' @param calendar String representing the calendar type, e.g., "gregorian". #' #' @return A `ClimdexGenericVector` object containing the processed vector data. +#'#' +#' @seealso [climdexGenericVector.raw()], [climdexSingleMonthlyVector.csv()] +#' +#' @examples +#' \dontrun{ +#' primary <- runif(12, 0, 20) +#' secondary <- runif(12, 0, 360) +#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") +#' vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, "polar") +#' } #' #' @export @@ -254,6 +264,16 @@ climdexSingleMonthlyVector.raw <- function( #' #' @return A `ClimdexGenericVector` object containing the processed vector climate data. #' +#' @seealso [climdexSingleMonthlyVector.raw()] +#' +#' @examples +#' \dontrun{ +#' csv_file <- "path/to/vector_data.csv" +#' vector_obj <- climdexSingleMonthlyVector.csv(file = csv_file, primary.column = "primary", +#' secondary.column = "secondary", date.columns = "date", +#' date.format = "%Y-%m-%d", format = "polar") +#' } +#' #' @export climdexSingleMonthlyVector.csv <- function( From 7bdf395863bb27c34f636e27ae5a5926ce27222a Mon Sep 17 00:00:00 2001 From: Quintin Date: Tue, 29 Oct 2024 15:33:11 -0700 Subject: [PATCH 04/10] Add single value per month testing --- tests/test_single_value_per_month.R | 373 ++++++++++++++++++++++++++-- 1 file changed, 351 insertions(+), 22 deletions(-) diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R index eb03999..416fb04 100644 --- a/tests/test_single_value_per_month.R +++ b/tests/test_single_value_per_month.R @@ -4,7 +4,6 @@ library(RUnit) climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { set.seed(123) - # Raw data construction using 1st of each month scalar_data <- runif(12, 0, 20) # One value per month dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) scalar_obj_raw <- climdexSingleMonthlyScalar.raw( @@ -37,7 +36,6 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { set.seed(123) - # Raw data construction using 1st of each month primary_data <- runif(12, 0, 20) # One value per month secondary_data <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) @@ -73,11 +71,11 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { checkTrue(all.equal(vector_obj_csv, vector_obj_raw), msg = "Vector_obj built from CSV is not identical to raw") } -# Test for scalar with missing values in data +# Test for scalar with missing value in data climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { set.seed(123) - # Simulate single monthly value data with an NA value + # Single monthly value data with an NA value data <- c(1:5, NA, 7:12) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) @@ -87,11 +85,11 @@ climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { checkTrue(length(scalar_obj@data) == length(scalar_obj@dates), msg = "NA values in single monthly scalar data are not handled correctly.") } -# Test for vector with missing values in primary data +# Test for vector with missing value in primary data climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { set.seed(123) - # Simulate single monthly value vector data with an NA value in the primary component + # Single monthly value vector data with an NA value in the primary component primary <- c(1:5, NA, 7:12) secondary <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) @@ -104,36 +102,367 @@ climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { # Test with zero-length scalar data climdex.pcic.test.SingleMonthlyScalar.raw.zero.length <- function() { - set.seed(123) - - # Create an empty vector and dates + # Empty data and dates vectors data <- numeric(0) - dates <- as.PCICt(character(0), cal = "gregorian") + dates <- as.PCICt(character(0), cal = "gregorian") checkException(climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), "Zero-length data not handled correctly.") } -# Test with out-of-sequence dates and sorted check -climdex.pcic.test.OutOfSequenceDates <- function() { + +climdex.pcic.test.MultiyearScalarContinuous <- function() { set.seed(123) - # Create scalar data with out-of-sequence dates - data <- 1:12 - dates <- as.PCICt(c("2020-01-01", "2020-03-01", "2020-05-01", "2020-02-01", "2020-04-01", "2020-06-01", - "2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", "2020-12-01"), - cal = "gregorian") + data <- runif(36, 0, 20) # One value per month for 3 years + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) - # Create the scalar object with out-of-sequence dates + # Create the scalar object scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") - # Extract the non-NA data and corresponding dates from the object + checkEquals(length(scalar_obj@data), length(scalar_obj@dates), "Multiyear scalar data length does not match dates.") + + # Validate that dates and data are maintained correctly + sorted_indices <- order(dates) + sorted_dates <- dates[sorted_indices] + sorted_data <- data[sorted_indices] + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] + checkEquals(obj_data, sorted_data, "Multiyear scalar data is not aligned correctly with sorted dates.") +} + +climdex.pcic.test.MultiyearVectorContinuous <- function() { + set.seed(123) + + primary <- runif(36, 0, 20) # One value per month for 3 years + secondary <- runif(36, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian") + + checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Multiyear vector primary length does not match dates.") + sorted_indices <- order(dates) sorted_dates <- dates[sorted_indices] - sorted_data <- data[sorted_indices] + sorted_primary <- primary[sorted_indices] + sorted_secondary <- secondary[sorted_indices] + + obj_dates <- vector_obj@dates[!is.na(vector_obj@primary)] + obj_primary <- vector_obj@primary[!is.na(vector_obj@primary)] + obj_secondary <- vector_obj@secondary[!is.na(vector_obj@primary)] + + checkEquals(obj_primary, sorted_primary, "Multiyear vector primary data is not aligned correctly with sorted dates.") + checkEquals(obj_secondary, sorted_secondary, "Multiyear vector secondary data is not aligned correctly with sorted dates.") +} + +climdex.pcic.test.MultiyearWithGaps <- function() { + set.seed(123) + + # Scalar data for three years with some missing months + data <- c(runif(11, 0, 20), NA, runif(11, 0, 20), NA, runif(10, 0, 20), NA, NA) # 36 data points with some NA values to align with dates + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + # Test that the scalar object is built without errors + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + # Validate non-NA data alignment + obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] + valid_indices <- !is.na(data) + checkEquals(obj_data, data[valid_indices], "Multiyear scalar data with gaps is not aligned correctly.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.dates.not.first.day <- function() { + set.seed(123) + + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-02", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to dates not being on the first day + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when dates were not on the first day of the month." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.mismatched.lengths <- function() { + set.seed(123) + + # Data and dates of different lengths + data <- runif(11, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to mismatched lengths + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when data and dates lengths were mismatched." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.non.numeric.data <- function() { + + data <- rep("non-numeric", 12) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to non-numeric data + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when non-numeric data was provided." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.csv.invalid.date.format <- function() { + # Invalid date formats + data <- runif(12, 0, 20) + dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Different date format + csv_data <- data.frame(date = dates, data = data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + # Expect an error due to invalid date format + checkException( + climdexSingleMonthlyScalar.csv( + file = temp_csv, + data.column = "data", + date.columns = "date", + date.format = "%Y-%m-%d", # Expecting different format + northern.hemisphere = TRUE, + calendar = "gregorian" + ), + "Function did not raise an error when invalid date format was provided in CSV." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.invalid.calendar <- function() { + set.seed(123) + + # Create valid data and dates + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to invalid calendar type + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "invalid_calendar"), + "Function did not raise an error when an invalid calendar type was provided." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.NA.dates <- function() { + set.seed(123) + + # NA in dates + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + dates[6] <- NA + + # Expect an error due to NA in dates + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when NA values were present in dates." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.leap.year <- function() { + set.seed(123) + + # Leap year + data <- runif(24, 0, 20) + dates <- seq(as.PCICt("2019-01-01", cal = "gregorian"), by = "month", length.out = 24) + + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + # Ensure that February 29, 2020, is included + checkTrue(any(format(scalar_obj@dates, "%Y-%m-%d") == "2020-02-29"), "Leap day not included in dates.") + + +} + +climdex.pcic.test.SingleMonthlyScalar.raw.extreme.values <- function() { + # Extreme values + data <- c(-1e10, runif(10, -1e5, 1e5), 1e10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + scalar_max <- unname(compute.stat.scalar(scalar_obj, "max", "annual", FALSE)) + scalar_min <- unname(compute.stat.scalar(scalar_obj, "min", "annual", FALSE)) + scalar_sum <- unname(compute.stat.scalar(scalar_obj, "sum", "annual", FALSE)) + checkEquals(scalar_max, 1e10, "annual max stat for single monthly scalar was not equal to max of input data") + checkEquals(scalar_min, -1e10, "annual min stat for single monthly scalar was not equal to min of input data") + checkEquals(scalar_sum, sum(data), "annual sum stat for single monthly scalar was not equal to sum of input data") +} + +climdex.pcic.test.SingleMonthlyVector.raw.missing.secondary <- function() { + set.seed(123) + + # NA in secondary component + primary <- runif(12, 0, 20) + secondary <- c(runif(5, 0, 360), NA, runif(6, 0, 360)) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + result <- try( + climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + +climdex.pcic.test.SingleMonthlyVector.raw.invalid.format <- function() { + set.seed(123) + + # Valid data + primary <- runif(12, 0, 20) + secondary <- runif(12, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to invalid format + checkException( + climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "invalid_format", northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when an invalid format was provided." + ) +} +climdex.pcic.test.SingleMonthlyScalar.raw.error.messages <- function() { + # Multiple data values per month + data <- runif(24, 0, 20) + dates <- c( + seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12), + seq(as.PCICt("2020-01-15", cal = "gregorian"), by = "month", length.out = 12) + ) + + # Capture the error message + error_message <- tryCatch( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + error = function(e) e$message + ) + + # Check error message + checkTrue( + grepl("exactly one value per month", error_message), + "Error message is not informative for multiple values per month." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.different.calendars <- function() { + set.seed(123) + + # Dates with a "noleap" calendar + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "noleap"), by = "month", length.out = 12) + + result <- try( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.timezones <- function() { + set.seed(123) + + # Create data with dates including time zones + data <- runif(12, 0, 20) + dates <- as.PCICt(seq(as.POSIXct("2020-01-01", tz = "UTC"), by = "month", length.out = 12), cal = "gregorian") + + result <- try( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.irregular.intervals <- function() { + set.seed(123) + + # Missing months + data <- runif(10, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "2 months", length.out = 10) + result <- try( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.negative.values <- function() { + # Data with negative values + data <- runif(36, -50, 0) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + scalar_max <- unname(compute.stat.scalar(scalar_obj, "max", "annual", FALSE)) + scalar_min <- unname(compute.stat.scalar(scalar_obj, "min", "annual", FALSE)) + scalar_sum <- unname(compute.stat.scalar(scalar_obj, "sum", "annual", FALSE)) + checkEquals(max(scalar_max), max(data), "annual max stat for single monthly scalar was not equal to max of input data") + checkEquals(min(scalar_min), min(data), "annual min stat for single monthly scalar was not equal to min of input data") + checkEquals(sum(scalar_sum), sum(data), "annual sum stat for single monthly scalar was not equal to sum of input data") +} + +climdex.pcic.test.SingleMonthlyVector.raw.cartesian <- function() { + set.seed(123) + + # Data in cartesian format (x,y components) + x_comp <- runif(12, -10, 10) + y_comp <- runif(12, -10, 10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + vector_obj <- climdexSingleMonthlyVector.raw( + primary = x_comp, + secondary = y_comp, + dates = dates, + format = "cartesian", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Verify components + checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Cartesian vector primary length does not match dates.") + checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], x_comp, "X component not stored correctly") + checkEquals(vector_obj@secondary[!is.na(vector_obj@secondary)], y_comp, "Y component not stored correctly") +} + +climdex.pcic.test.SingleMonthlyScalar.large.dataset <- function() { + set.seed(123) + + # Create 100 years of monthly data + n_months <- 100 * 12 + data <- runif(n_months, 0, 20) + dates <- seq(as.PCICt("1920-01-01", cal = "gregorian"), by = "month", length.out = n_months) + + scalar_obj <- climdexSingleMonthlyScalar.raw( + data = data, + dates = dates, + northern.hemisphere = TRUE, + calendar = "gregorian" + ) - checkEquals(obj_data, sorted_data, - "Object Data is not aligned correctly with sorted dates.") + checkEquals(length(scalar_obj@data[!is.na(scalar_obj@data)]), n_months, "Large dataset not handled correctly") } From 6c3f6fff1868d35b624522ddfa91b54f8adcb5a5 Mon Sep 17 00:00:00 2001 From: Quintin Date: Wed, 30 Oct 2024 12:18:10 -0700 Subject: [PATCH 05/10] Update tests and add validation for empty or NA inputs --- R/GenericVariable_utils.R | 16 ++- R/climdexGenericScalar.R | 38 ++--- R/climdexGenericVector.R | 53 +++---- tests/test_single_value_per_month.R | 215 +++++++++++++++------------- 4 files changed, 174 insertions(+), 148 deletions(-) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R index 70e89c6..0601c7f 100644 --- a/R/GenericVariable_utils.R +++ b/R/GenericVariable_utils.R @@ -1,6 +1,6 @@ # Utility function to validate arguments for scalar and vector data. check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) { - + if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) { stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.") } @@ -14,18 +14,24 @@ check.generic.argument.validity <- function( data, dates, max.missing.days, cale if (missing(dates)) { stop("Argument 'dates' is missing.") } - + if (any(is.na(dates))){ + stop("Argument 'dates' has NA values.") + } if (!is.numeric(data)) { stop("Primary Data must be numeric.") } - + if (length(data) == 0 || length(dates) == 0) { + stop("Primary data and dates must not be empty vectors.") + } if (length(data) != length(dates)) { stop("Primary data and dates must have the same length.") } - if(!is.null(dates) && !inherits(dates, "PCICt")) - stop(paste("Dates must be of class PCICt.")) + if(!is.null(dates) && !inherits(dates, "PCICt")){ + stop(paste("Dates must be of class PCICt.")) + } + # Calendar check: verify it matches one of the recognized types valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian") diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index b0652e9..4012ad5 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -127,7 +127,6 @@ climdexGenericScalar.csv <- function( return(obj) } - #' @title climdexSingleMonthlyScalar.raw #' #' @description @@ -135,7 +134,7 @@ climdexGenericScalar.csv <- function( #' #' @details #' This function is a wrapper for creating `ClimdexGenericScalar` objects where temporal resolution is a single value per month. -#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond +#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond #' to the 1st day of each month. The function will raise an error if there is more than one value per month or if any date is not on the 1st. #' #' @param data A numeric vector containing the scalar climate data. @@ -145,7 +144,7 @@ climdexGenericScalar.csv <- function( #' @return A `ClimdexGenericScalar` object containing the processed data. #' #' @seealso [climdexGenericScalar.raw()], [climdexSingleMonthlyScalar.csv()] -#' +#' #' @examples #' \dontrun{ #' data <- runif(12, 0, 20) @@ -159,24 +158,24 @@ climdexSingleMonthlyScalar.raw <- function( data, dates, northern.hemisphere = TRUE, - calendar = "gregorian" -) { + calendar = "gregorian") { max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) + valid_dates <- dates[!is.na(dates)] # Check if there is exactly one value per month on the 1st day - unique_months <- unique(format(dates, "%Y-%m")) - day_of_month <- as.integer(format(dates, "%d")) - + unique_months <- unique(format(valid_dates, "%Y-%m")) + day_of_month <- as.integer(format(valid_dates, "%d")) + # Check that the length of unique months matches the number of dates, ensuring only one value per month - if (length(unique_months) != length(dates)) { + if (length(unique_months) != length(valid_dates)) { stop("Data must have exactly one value per month.") } - + # Check that all dates correspond to the 1st day of each month if (!all(day_of_month == 1)) { stop("Data must be on the 1st day of each month.") } - + obj <- climdexGenericScalar.raw( data = data, dates = dates, @@ -193,7 +192,7 @@ climdexSingleMonthlyScalar.raw <- function( #' Reads scalar climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericScalar` object. #' #' @details -#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days` +#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days` #' to `+Inf` and builds a `ClimdexGenericScalar` object. Each date must correspond to the 1st day of each month. #' #' @param file The file path to the CSV containing the scalar climate data. @@ -207,12 +206,14 @@ climdexSingleMonthlyScalar.raw <- function( #' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. #' #' @seealso [climdexSingleMonthlyScalar.raw()] -#' +#' #' @examples #' \dontrun{ #' csv_file <- "path/to/scalar_data.csv" -#' scalar_obj <- climdexSingleMonthlyScalar.csv(file = csv_file, data.column = "data", -#' date.columns = "date", date.format = "%Y-%m-%d") +#' scalar_obj <- climdexSingleMonthlyScalar.csv( +#' file = csv_file, data.column = "data", +#' date.columns = "date", date.format = "%Y-%m-%d" +#' ) #' } #' #' @export @@ -224,16 +225,15 @@ climdexSingleMonthlyScalar.csv <- function( date.format, na.strings = NULL, northern.hemisphere = TRUE, - calendar = "gregorian" -) { + calendar = "gregorian") { GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar) - + obj <- climdexSingleMonthlyScalar.raw( data = GS.csv$data[[1]], dates = GS.csv$dates, northern.hemisphere = northern.hemisphere, calendar = calendar ) - + return(obj) } diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index f4fa294..990d9e5 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -45,6 +45,9 @@ climdexGenericVector.raw <- function( if (missing(secondary)) { stop("Secondary data argument is missing.") } + if (length(secondary) == 0 || length(dates) == 0) { + stop("Secondary must not be an empty vector.") + } # Check that primary, secondary, and dates have the same length if (length(primary) != length(secondary) || length(primary) != length(dates)) { stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.") @@ -172,7 +175,6 @@ climdexGenericVector.csv <- function( return(obj) } - #' @title climdexSingleMonthlyVector.raw #' #' @description @@ -181,7 +183,7 @@ climdexGenericVector.csv <- function( #' @details #' This function processes vector climate data and validates that there is a single value per month. #' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. -#' To ensure consistency, each data point must correspond to the 1st day of each month. +#' To ensure consistency, each data point must correspond to the 1st day of each month. #' The function will raise an error if there is more than one value per month or if any date is not on the 1st. #' #' @param primary A numeric vector representing the primary data (e.g., wind speed). @@ -190,11 +192,11 @@ climdexGenericVector.csv <- function( #' @param format A string specifying the format of the vector data ("polar", "cartesian", or "cardinal"). #' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. #' @param calendar String representing the calendar type, e.g., "gregorian". -#' +#' #' @return A `ClimdexGenericVector` object containing the processed vector data. -#'#' +#' #' #' @seealso [climdexGenericVector.raw()], [climdexSingleMonthlyVector.csv()] -#' +#' #' @examples #' \dontrun{ #' primary <- runif(12, 0, 20) @@ -211,24 +213,24 @@ climdexSingleMonthlyVector.raw <- function( dates, format = "polar", northern.hemisphere = TRUE, - calendar = "gregorian" -) { + calendar = "gregorian") { max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) - + + valid_dates <- dates[!is.na(dates)] # Check if there is exactly one value per month on the 1st day - unique_months <- unique(format(dates, "%Y-%m")) - day_of_month <- as.integer(format(dates, "%d")) - + unique_months <- unique(format(valid_dates, "%Y-%m")) + day_of_month <- as.integer(format(valid_dates, "%d")) + # Check that the length of unique months matches the number of dates, ensuring only one value per month - if (length(unique_months) != length(dates)) { + if (length(unique_months) != length(valid_dates)) { stop("Data must have exactly one value per month.") } - + # Check that all dates correspond to the 1st day of each month if (!all(day_of_month == 1)) { stop("Data must be on the 1st day of each month.") } - + obj <- climdexGenericVector.raw( primary = primary, secondary = secondary, @@ -238,7 +240,7 @@ climdexSingleMonthlyVector.raw <- function( northern.hemisphere = northern.hemisphere, calendar = calendar ) - + return(obj) } @@ -248,8 +250,8 @@ climdexSingleMonthlyVector.raw <- function( #' Reads vector climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericVector` object. #' #' @details -#' This function reads vector climate data and validates that there is a single value per month. -#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. +#' This function reads vector climate data and validates that there is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. #' Each date must correspond to the 1st day of each month. #' #' @param file The file path to the CSV containing the vector climate data. @@ -265,13 +267,15 @@ climdexSingleMonthlyVector.raw <- function( #' @return A `ClimdexGenericVector` object containing the processed vector climate data. #' #' @seealso [climdexSingleMonthlyVector.raw()] -#' +#' #' @examples #' \dontrun{ #' csv_file <- "path/to/vector_data.csv" -#' vector_obj <- climdexSingleMonthlyVector.csv(file = csv_file, primary.column = "primary", -#' secondary.column = "secondary", date.columns = "date", -#' date.format = "%Y-%m-%d", format = "polar") +#' vector_obj <- climdexSingleMonthlyVector.csv( +#' file = csv_file, primary.column = "primary", +#' secondary.column = "secondary", date.columns = "date", +#' date.format = "%Y-%m-%d", format = "polar" +#' ) #' } #' #' @export @@ -285,10 +289,9 @@ climdexSingleMonthlyVector.csv <- function( format = "polar", na.strings = NULL, northern.hemisphere = TRUE, - calendar = "gregorian" -) { + calendar = "gregorian") { GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) - + obj <- climdexSingleMonthlyVector.raw( primary = GV.csv$data[[1]], secondary = GV.csv$data[[2]], @@ -297,6 +300,6 @@ climdexSingleMonthlyVector.csv <- function( northern.hemisphere = northern.hemisphere, calendar = calendar ) - + return(obj) } diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R index 416fb04..968f3c9 100644 --- a/tests/test_single_value_per_month.R +++ b/tests/test_single_value_per_month.R @@ -3,8 +3,8 @@ library(RUnit) climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { set.seed(123) - - scalar_data <- runif(12, 0, 20) # One value per month + + scalar_data <- runif(12, 0, 20) # One value per month dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) scalar_obj_raw <- climdexSingleMonthlyScalar.raw( data = scalar_data, @@ -12,13 +12,11 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(length(scalar_obj_raw@data), length(scalar_obj_raw@dates), "Raw scalar construction: data length does not match dates.") - + csv_data <- data.frame(date = as.character(dates), data = scalar_data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) - + scalar_obj_csv <- climdexSingleMonthlyScalar.csv( file = temp_csv, data.column = "data", @@ -27,19 +25,18 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(length(scalar_obj_csv@data), length(scalar_obj_csv@dates), "CSV scalar construction: data length does not match dates.") + checkEquals(scalar_obj_raw@dates, scalar_obj_csv@dates, "Date mismatch between raw and CSV scalar objects.") checkTrue(all.equal(scalar_obj_csv, scalar_obj_raw), msg = "Scalar_obj built from CSV is not identical to raw") } climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { set.seed(123) - - primary_data <- runif(12, 0, 20) # One value per month + + primary_data <- runif(12, 0, 20) # One value per month secondary_data <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + vector_obj_raw <- climdexSingleMonthlyVector.raw( primary = primary_data, secondary = secondary_data, @@ -48,13 +45,11 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(length(vector_obj_raw@primary), length(vector_obj_raw@dates), "Raw vector construction: primary length does not match dates.") - + csv_data <- data.frame(date = as.character(dates), primary = primary_data, secondary = secondary_data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) - + vector_obj_csv <- climdexSingleMonthlyVector.csv( file = temp_csv, primary.column = "primary", @@ -65,109 +60,123 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(length(vector_obj_csv@primary), length(vector_obj_csv@dates), "CSV vector construction: primary length does not match dates.") + checkEquals(vector_obj_raw@dates, vector_obj_csv@dates, "Date mismatch between raw and CSV vector objects.") checkTrue(all.equal(vector_obj_csv, vector_obj_raw), msg = "Vector_obj built from CSV is not identical to raw") } -# Test for scalar with missing value in data climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { set.seed(123) - + # Single monthly value data with an NA value data <- c(1:5, NA, 7:12) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Create the climdexSingleMonthlyScalar object and expect it to pass without failure - scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") - - checkTrue(length(scalar_obj@data) == length(scalar_obj@dates), msg = "NA values in single monthly scalar data are not handled correctly.") + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) } -# Test for vector with missing value in primary data climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { set.seed(123) - + # Single monthly value vector data with an NA value in the primary component primary <- c(1:5, NA, 7:12) secondary <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Create the climdexSingleMonthlyVector object and expect it to pass without failure - vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian") - - checkTrue(length(vector_obj@primary) == length(vector_obj@dates), msg = "NA values in single monthly vector data are not handled correctly.") + result <- try( + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) } -# Test with zero-length scalar data climdex.pcic.test.SingleMonthlyScalar.raw.zero.length <- function() { # Empty data and dates vectors data <- numeric(0) - dates <- as.PCICt(character(0), cal = "gregorian") - checkException(climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), - "Zero-length data not handled correctly.") -} + dates <- as.PCICt(character(0), cal = "gregorian") + error_message <- tryCatch( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + error = function(e) e$message + ) + + # Check error message + checkTrue( + grepl("Primary data and dates must not be empty vectors.", error_message), + "Error message is not informative for empty input vectors." + ) +} climdex.pcic.test.MultiyearScalarContinuous <- function() { set.seed(123) - - data <- runif(36, 0, 20) # One value per month for 3 years + + data <- runif(36, 0, 20) # One value per month for 3 years dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) - + # Create the scalar object scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") - - checkEquals(length(scalar_obj@data), length(scalar_obj@dates), "Multiyear scalar data length does not match dates.") - + # Validate that dates and data are maintained correctly sorted_indices <- order(dates) sorted_dates <- dates[sorted_indices] sorted_data <- data[sorted_indices] - + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] - + checkEquals(obj_data, sorted_data, "Multiyear scalar data is not aligned correctly with sorted dates.") } climdex.pcic.test.MultiyearVectorContinuous <- function() { set.seed(123) - - primary <- runif(36, 0, 20) # One value per month for 3 years + + primary <- runif(36, 0, 20) # One value per month for 3 years secondary <- runif(36, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) - + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian") - + checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Multiyear vector primary length does not match dates.") - + sorted_indices <- order(dates) sorted_dates <- dates[sorted_indices] sorted_primary <- primary[sorted_indices] sorted_secondary <- secondary[sorted_indices] - + obj_dates <- vector_obj@dates[!is.na(vector_obj@primary)] obj_primary <- vector_obj@primary[!is.na(vector_obj@primary)] obj_secondary <- vector_obj@secondary[!is.na(vector_obj@primary)] - + checkEquals(obj_primary, sorted_primary, "Multiyear vector primary data is not aligned correctly with sorted dates.") checkEquals(obj_secondary, sorted_secondary, "Multiyear vector secondary data is not aligned correctly with sorted dates.") } climdex.pcic.test.MultiyearWithGaps <- function() { set.seed(123) - + # Scalar data for three years with some missing months - data <- c(runif(11, 0, 20), NA, runif(11, 0, 20), NA, runif(10, 0, 20), NA, NA) # 36 data points with some NA values to align with dates + data <- c(runif(11, 0, 20), NA, runif(11, 0, 20), NA, runif(10, 0, 20), NA, NA) # 36 data points with some NA values to align with dates dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) # Test that the scalar object is built without errors result <- try( scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." @@ -181,10 +190,10 @@ climdex.pcic.test.MultiyearWithGaps <- function() { climdex.pcic.test.SingleMonthlyScalar.raw.dates.not.first.day <- function() { set.seed(123) - + data <- runif(12, 0, 20) dates <- seq(as.PCICt("2020-01-02", cal = "gregorian"), by = "month", length.out = 12) - + # Expect an error due to dates not being on the first day checkException( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), @@ -194,11 +203,11 @@ climdex.pcic.test.SingleMonthlyScalar.raw.dates.not.first.day <- function() { climdex.pcic.test.SingleMonthlyScalar.raw.mismatched.lengths <- function() { set.seed(123) - + # Data and dates of different lengths data <- runif(11, 0, 20) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Expect an error due to mismatched lengths checkException( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), @@ -207,10 +216,9 @@ climdex.pcic.test.SingleMonthlyScalar.raw.mismatched.lengths <- function() { } climdex.pcic.test.SingleMonthlyScalar.raw.non.numeric.data <- function() { - data <- rep("non-numeric", 12) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Expect an error due to non-numeric data checkException( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), @@ -221,18 +229,18 @@ climdex.pcic.test.SingleMonthlyScalar.raw.non.numeric.data <- function() { climdex.pcic.test.SingleMonthlyScalar.csv.invalid.date.format <- function() { # Invalid date formats data <- runif(12, 0, 20) - dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Different date format + dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Different date format csv_data <- data.frame(date = dates, data = data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) - + # Expect an error due to invalid date format checkException( climdexSingleMonthlyScalar.csv( file = temp_csv, data.column = "data", date.columns = "date", - date.format = "%Y-%m-%d", # Expecting different format + date.format = "%Y-%m-%d", # Expecting different format northern.hemisphere = TRUE, calendar = "gregorian" ), @@ -242,11 +250,11 @@ climdex.pcic.test.SingleMonthlyScalar.csv.invalid.date.format <- function() { climdex.pcic.test.SingleMonthlyScalar.raw.invalid.calendar <- function() { set.seed(123) - + # Create valid data and dates data <- runif(12, 0, 20) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Expect an error due to invalid calendar type checkException( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "invalid_calendar"), @@ -256,22 +264,29 @@ climdex.pcic.test.SingleMonthlyScalar.raw.invalid.calendar <- function() { climdex.pcic.test.SingleMonthlyScalar.raw.NA.dates <- function() { set.seed(123) - + # NA in dates data <- runif(12, 0, 20) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - dates[6] <- NA - - # Expect an error due to NA in dates - checkException( + dates_char <- as.character(dates) + dates_char[6] <- NA # Insert NA + dates <- as.PCICt(dates_char, cal = "gregorian") + # Capture the error message + error_message <- tryCatch( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), - "Function did not raise an error when NA values were present in dates." + error = function(e) e$message + ) + error_message + # Check error message + checkTrue( + grepl("Argument 'dates' has NA values.", error_message), + "Error message is not informative for NA values in dates." ) } climdex.pcic.test.SingleMonthlyScalar.raw.leap.year <- function() { set.seed(123) - + # Leap year data <- runif(24, 0, 20) dates <- seq(as.PCICt("2019-01-01", cal = "gregorian"), by = "month", length.out = 24) @@ -280,15 +295,13 @@ climdex.pcic.test.SingleMonthlyScalar.raw.leap.year <- function() { scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) # Ensure that February 29, 2020, is included checkTrue(any(format(scalar_obj@dates, "%Y-%m-%d") == "2020-02-29"), "Leap day not included in dates.") - - } climdex.pcic.test.SingleMonthlyScalar.raw.extreme.values <- function() { @@ -307,37 +320,40 @@ climdex.pcic.test.SingleMonthlyScalar.raw.extreme.values <- function() { climdex.pcic.test.SingleMonthlyVector.raw.missing.secondary <- function() { set.seed(123) - + # NA in secondary component primary <- runif(12, 0, 20) secondary <- c(runif(5, 0, 360), NA, runif(6, 0, 360)) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + result <- try( - climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) + checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], vector_obj@primary[!is.na(vector_obj@secondary)]) } + climdex.pcic.test.SingleMonthlyVector.raw.invalid.format <- function() { set.seed(123) - + # Valid data primary <- runif(12, 0, 20) secondary <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + # Expect an error due to invalid format checkException( climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "invalid_format", northern.hemisphere = TRUE, calendar = "gregorian"), "Function did not raise an error when an invalid format was provided." ) } + climdex.pcic.test.SingleMonthlyScalar.raw.error.messages <- function() { # Multiple data values per month data <- runif(24, 0, 20) @@ -345,13 +361,13 @@ climdex.pcic.test.SingleMonthlyScalar.raw.error.messages <- function() { seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12), seq(as.PCICt("2020-01-15", cal = "gregorian"), by = "month", length.out = 12) ) - + # Capture the error message error_message <- tryCatch( climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), error = function(e) e$message ) - + # Check error message checkTrue( grepl("exactly one value per month", error_message), @@ -361,62 +377,63 @@ climdex.pcic.test.SingleMonthlyScalar.raw.error.messages <- function() { climdex.pcic.test.SingleMonthlyScalar.raw.different.calendars <- function() { set.seed(123) - + # Dates with a "noleap" calendar data <- runif(12, 0, 20) dates <- seq(as.PCICt("2020-01-01", cal = "noleap"), by = "month", length.out = 12) - result <- try( - climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) } climdex.pcic.test.SingleMonthlyScalar.raw.timezones <- function() { set.seed(123) - + # Create data with dates including time zones data <- runif(12, 0, 20) dates <- as.PCICt(seq(as.POSIXct("2020-01-01", tz = "UTC"), by = "month", length.out = 12), cal = "gregorian") - result <- try( - climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) } climdex.pcic.test.SingleMonthlyScalar.raw.irregular.intervals <- function() { set.seed(123) - + # Missing months data <- runif(10, 0, 20) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "2 months", length.out = 10) result <- try( - climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), silent = TRUE ) - + checkTrue( !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) } climdex.pcic.test.SingleMonthlyScalar.raw.negative.values <- function() { # Data with negative values data <- runif(36, -50, 0) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) - + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") scalar_max <- unname(compute.stat.scalar(scalar_obj, "max", "annual", FALSE)) scalar_min <- unname(compute.stat.scalar(scalar_obj, "min", "annual", FALSE)) @@ -428,12 +445,12 @@ climdex.pcic.test.SingleMonthlyScalar.raw.negative.values <- function() { climdex.pcic.test.SingleMonthlyVector.raw.cartesian <- function() { set.seed(123) - + # Data in cartesian format (x,y components) x_comp <- runif(12, -10, 10) y_comp <- runif(12, -10, 10) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + vector_obj <- climdexSingleMonthlyVector.raw( primary = x_comp, secondary = y_comp, @@ -442,7 +459,7 @@ climdex.pcic.test.SingleMonthlyVector.raw.cartesian <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - + # Verify components checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Cartesian vector primary length does not match dates.") checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], x_comp, "X component not stored correctly") @@ -451,18 +468,18 @@ climdex.pcic.test.SingleMonthlyVector.raw.cartesian <- function() { climdex.pcic.test.SingleMonthlyScalar.large.dataset <- function() { set.seed(123) - + # Create 100 years of monthly data n_months <- 100 * 12 data <- runif(n_months, 0, 20) dates <- seq(as.PCICt("1920-01-01", cal = "gregorian"), by = "month", length.out = n_months) - + scalar_obj <- climdexSingleMonthlyScalar.raw( data = data, dates = dates, northern.hemisphere = TRUE, calendar = "gregorian" ) - + checkEquals(length(scalar_obj@data[!is.na(scalar_obj@data)]), n_months, "Large dataset not handled correctly") } From 7e6e2a300d12afced2a22f45c5d6bb357f0385f6 Mon Sep 17 00:00:00 2001 From: Quintin Date: Wed, 30 Oct 2024 13:17:08 -0700 Subject: [PATCH 06/10] Add descriptive error messages to document failed checkFuncs --- tests/test_single_value_per_month.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R index 968f3c9..5c1ad07 100644 --- a/tests/test_single_value_per_month.R +++ b/tests/test_single_value_per_month.R @@ -227,9 +227,8 @@ climdex.pcic.test.SingleMonthlyScalar.raw.non.numeric.data <- function() { } climdex.pcic.test.SingleMonthlyScalar.csv.invalid.date.format <- function() { - # Invalid date formats data <- runif(12, 0, 20) - dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Different date format + dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Invalid date format csv_data <- data.frame(date = dates, data = data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) @@ -276,7 +275,6 @@ climdex.pcic.test.SingleMonthlyScalar.raw.NA.dates <- function() { climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), error = function(e) e$message ) - error_message # Check error message checkTrue( grepl("Argument 'dates' has NA values.", error_message), @@ -335,7 +333,7 @@ climdex.pcic.test.SingleMonthlyVector.raw.missing.secondary <- function() { !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) - checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], vector_obj@primary[!is.na(vector_obj@secondary)]) + checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], vector_obj@primary[!is.na(vector_obj@secondary)], "Vector objects NA values are not in sync beteen primary and secondary data.") } @@ -390,7 +388,7 @@ climdex.pcic.test.SingleMonthlyScalar.raw.different.calendars <- function() { !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) - checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") } climdex.pcic.test.SingleMonthlyScalar.raw.timezones <- function() { @@ -408,7 +406,7 @@ climdex.pcic.test.SingleMonthlyScalar.raw.timezones <- function() { !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) - checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") } climdex.pcic.test.SingleMonthlyScalar.raw.irregular.intervals <- function() { @@ -426,7 +424,7 @@ climdex.pcic.test.SingleMonthlyScalar.raw.irregular.intervals <- function() { !inherits(result, "try-error"), "Function raised an error despite valid monthly data." ) - checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") } climdex.pcic.test.SingleMonthlyScalar.raw.negative.values <- function() { From ec6f655b4043fa48c76c829c6273d0bebbeeeb68 Mon Sep 17 00:00:00 2001 From: Quintin Date: Wed, 30 Oct 2024 14:09:55 -0700 Subject: [PATCH 07/10] Use obj date.factors for single monthly value check --- R/climdexGenericVector.R | 2 +- R/generic_stats.R | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index 990d9e5..9241edb 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -45,7 +45,7 @@ climdexGenericVector.raw <- function( if (missing(secondary)) { stop("Secondary data argument is missing.") } - if (length(secondary) == 0 || length(dates) == 0) { + if (length(secondary) == 0) { stop("Secondary must not be an empty vector.") } # Check that primary, secondary, and dates have the same length diff --git a/R/generic_stats.R b/R/generic_stats.R index 8853d23..295de9c 100644 --- a/R/generic_stats.R +++ b/R/generic_stats.R @@ -42,8 +42,7 @@ compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", exact_date_stats <- c("max", "min") # Determine if the data is single-value per month - actual_month_factor <- factor(format(gen.var@dates, "%Y-%m")) - single_value_per_month <- all(tapply(data, actual_month_factor, function(x) length(na.omit(x)) == 1, simplify = TRUE)) + single_value_per_month <- all(tapply(data, gen.var@date.factors$monthly, function(x) length(na.omit(x)) == 1, simplify = TRUE)) # Check if the data is single-value per month From c28faec711fa9f134289089ccffe997610feb484 Mon Sep 17 00:00:00 2001 From: Quintin Date: Thu, 7 Nov 2024 12:54:05 -0800 Subject: [PATCH 08/10] Address PR feedback --- R/GenericVariable_utils.R | 102 +++++++++++++++++++------- R/climdexGenericScalar.R | 77 ++++++++----------- R/climdexGenericVector.R | 110 +++++++++------------------- tests/test_single_value_per_month.R | 92 +++++++++++++++++++---- 4 files changed, 218 insertions(+), 163 deletions(-) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R index 0601c7f..00d59ae 100644 --- a/R/GenericVariable_utils.R +++ b/R/GenericVariable_utils.R @@ -1,37 +1,70 @@ # Utility function to validate arguments for scalar and vector data. -check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) { - +check.generic.argument.validity <- function( + data, + dates, + max.missing.days, + calendar, + is.vector = FALSE, + secondary = NULL, + format = NULL +) { + # Internal function to validate data and date arguments + validate_data_dates <- function(data, dates, name) { + if (missing(data) || is.null(data)) { + stop(paste(name, "argument is missing.")) + } + if (length(data) == 0 || length(dates) == 0) { + stop(paste(name, "and dates must not be empty vectors.")) + } + if (!is.numeric(data) && (name != "Secondary data")) { + stop(paste(name, "must be numeric.")) + } + if (length(data) != length(dates)) { + stop(paste(name, "and dates must have the same length.")) + } + if (any(is.na(dates))) { + stop(paste("Argument 'dates' has NA values.")) + } + } + + # Check max.missing.days if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) { stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.") } + # Validate primary data and dates + validate_data_dates(data, dates, "Primary data") - # Check that required arguments are provided - if (missing(data)) { - stop("Primary data argument is missing.") - } - - if (missing(dates)) { - stop("Argument 'dates' is missing.") - } - if (any(is.na(dates))){ - stop("Argument 'dates' has NA values.") + # Check if dates are PCICt + if (!inherits(dates, "PCICt")) { + stop("Dates must be of class PCICt.") } - if (!is.numeric(data)) { - stop("Primary Data must be numeric.") - } - if (length(data) == 0 || length(dates) == 0) { - stop("Primary data and dates must not be empty vectors.") - } - if (length(data) != length(dates)) { - stop("Primary data and dates must have the same length.") - } - - if(!is.null(dates) && !inherits(dates, "PCICt")){ - stop(paste("Dates must be of class PCICt.")) + # Vector-specific checks + if (is.vector) { + + validate_data_dates(secondary, dates, "Secondary data") + # Check that 'format' is provided + if (missing(format) || is.null(format)) { + stop("Argument 'format' is missing.") + } + + # Convert the format to lowercase to allow case-insensitive input + format <- tolower(format) + + # Additional validation for format + if (format %in% c("polar", "cartesian")) { + if (!is.numeric(secondary)) { + stop("For 'polar' or 'cartesian' formats, 'secondary' must be numeric.") + } + } else if (format == "cardinal") { + if (!is.character(secondary)) { + stop("For 'cardinal' format, 'secondary' must be character.") + } + } else { + stop("Invalid 'format'. Use 'polar', 'cartesian', or 'cardinal'.") + } } - # Calendar check: verify it matches one of the recognized types valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian") @@ -41,6 +74,25 @@ check.generic.argument.validity <- function( data, dates, max.missing.days, cale } } +# For single-value-per-month data. Check one day per month and that the day is always the first. +check.single.month.dates <- function(dates) { + valid_dates <- dates[!is.na(dates)] + # Check if there is exactly one value per month on the 1st day + unique_months <- unique(format(valid_dates, "%Y-%m")) + day_of_month <- as.integer(format(valid_dates, "%d")) + + # Check that the length of unique months matches the number of dates, ensuring only one value per month + if (length(unique_months) != length(valid_dates)) { + stop("Data must have exactly one value per month.") + } + + # Check that all dates correspond to the 1st day of each month + if (!all(day_of_month == 1)) { + stop("Data must be on the 1st day of each month.") + } +} + + # Utility function to handle date ranges and generate date factors. date_info <- function(dates) { cal <- attr(dates, "cal") diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index 4012ad5..339486d 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -47,16 +47,15 @@ climdexGenericScalar.raw <- function( filled.list <- generate_filled_list(data, dates, date.series) names(filled.list) <- "data" namasks <- generate_namasks(filled.list, date.factors, max.missing.days) - obj <- new("climdexGenericScalar", - data = filled.list[["data"]], - dates = date.series, - date.factors = date.factors, - jdays = jdays, - namasks = namasks, - northern.hemisphere = northern.hemisphere, - max.missing.days = max.missing.days) - return(obj) + return(new("climdexGenericScalar", + data = filled.list[["data"]], + dates = date.series, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days)) } #' @title climdexGenericScalar.csv @@ -117,15 +116,14 @@ climdexGenericScalar.csv <- function( ) { GS.csv <- read_csv_data(file, data.column, date.columns, date.format, na.strings, calendar) - obj <- climdexGenericScalar.raw( - data = GS.csv$data[[1]], - dates = GS.csv$dates, - northern.hemisphere = northern.hemisphere, - max.missing.days = max.missing.days, - calendar = calendar - ) - return(obj) + return(climdexGenericScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days, + calendar = calendar +)) } #' @title climdexSingleMonthlyScalar.raw #' @@ -161,29 +159,15 @@ climdexSingleMonthlyScalar.raw <- function( calendar = "gregorian") { max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) - valid_dates <- dates[!is.na(dates)] - # Check if there is exactly one value per month on the 1st day - unique_months <- unique(format(valid_dates, "%Y-%m")) - day_of_month <- as.integer(format(valid_dates, "%d")) - - # Check that the length of unique months matches the number of dates, ensuring only one value per month - if (length(unique_months) != length(valid_dates)) { - stop("Data must have exactly one value per month.") - } - - # Check that all dates correspond to the 1st day of each month - if (!all(day_of_month == 1)) { - stop("Data must be on the 1st day of each month.") - } - - obj <- climdexGenericScalar.raw( - data = data, - dates = dates, - max.missing.days = max.missing.days, - northern.hemisphere = northern.hemisphere, - calendar = calendar - ) - return(obj) + check.single.month.dates(dates) + + return(climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) } #' @title climdexSingleMonthlyScalar.csv @@ -228,12 +212,11 @@ climdexSingleMonthlyScalar.csv <- function( calendar = "gregorian") { GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar) - obj <- climdexSingleMonthlyScalar.raw( - data = GS.csv$data[[1]], - dates = GS.csv$dates, - northern.hemisphere = northern.hemisphere, - calendar = calendar - ) - return(obj) + return(climdexSingleMonthlyScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) } diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index 9241edb..104c06d 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -41,32 +41,9 @@ climdexGenericVector.raw <- function( calendar = "gregorian" ) { - check.generic.argument.validity(primary, dates, max.missing.days, calendar) - if (missing(secondary)) { - stop("Secondary data argument is missing.") - } - if (length(secondary) == 0) { - stop("Secondary must not be an empty vector.") - } - # Check that primary, secondary, and dates have the same length - if (length(primary) != length(secondary) || length(primary) != length(dates)) { - stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.") - } - # Convert the format to lowercase to allow case-insensitive input - format <- tolower(format) - - # Additional validation for format - if (format %in% c("polar", "cartesian")) { - if (!is.numeric(secondary)) { - stop("For 'polar' or 'cartesian' formats, 'secondary' must be numeric.") - } - } else if (format == "cardinal") { - if (!is.character(secondary)) { - stop("For 'cardinal' format, 'secondary' must be character.") - } - } else { - stop("Invalid 'format'. Use 'polar', 'cartesian', or 'cardinal'.") - } + check.generic.argument.validity(primary, dates, max.missing.days, calendar, + is.vector = TRUE, secondary, format) + date.info <- date_info(dates) jdays = date.info$jdays @@ -79,19 +56,18 @@ climdexGenericVector.raw <- function( filled.secondary[is.na(filled.primary)] <- NA filled.primary[is.na(filled.secondary)] <- NA namasks <- generate_namasks(list(primary = filled.primary, secondary = filled.secondary), date.factors, max.missing.days) - - obj <- new("climdexGenericVector", - primary = filled.primary, - secondary = filled.secondary, - dates = date.series, - format = format, - date.factors = date.factors, - jdays = jdays, - namasks = namasks, - max.missing.days = max.missing.days, - northern.hemisphere = northern.hemisphere) - return(obj) + return(new("climdexGenericVector", + primary = filled.primary, + secondary = filled.secondary, + dates = date.series, + format = format, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere + )) } #' @title climdexGenericVector.csv @@ -163,17 +139,15 @@ climdexGenericVector.csv <- function( secondary_values <- GV.csv$data[[2]] dates <- GV.csv$dates - obj <- climdexGenericVector.raw( - primary = primary_values, - secondary = secondary_values, - dates = dates, - format = format, - max.missing.days = max.missing.days, - northern.hemisphere = northern.hemisphere, - calendar = calendar - ) - - return(obj) + return(climdexGenericVector.raw( + primary = primary_values, + secondary = secondary_values, + dates = dates, + format = format, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) } #' @title climdexSingleMonthlyVector.raw #' @@ -216,22 +190,9 @@ climdexSingleMonthlyVector.raw <- function( calendar = "gregorian") { max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) - valid_dates <- dates[!is.na(dates)] - # Check if there is exactly one value per month on the 1st day - unique_months <- unique(format(valid_dates, "%Y-%m")) - day_of_month <- as.integer(format(valid_dates, "%d")) - - # Check that the length of unique months matches the number of dates, ensuring only one value per month - if (length(unique_months) != length(valid_dates)) { - stop("Data must have exactly one value per month.") - } - - # Check that all dates correspond to the 1st day of each month - if (!all(day_of_month == 1)) { - stop("Data must be on the 1st day of each month.") - } + check.single.month.dates(dates) - obj <- climdexGenericVector.raw( + return(climdexGenericVector.raw( primary = primary, secondary = secondary, dates = dates, @@ -239,9 +200,7 @@ climdexSingleMonthlyVector.raw <- function( max.missing.days = max.missing.days, northern.hemisphere = northern.hemisphere, calendar = calendar - ) - - return(obj) + )) } #' @title climdexSingleMonthlyVector.csv @@ -292,14 +251,13 @@ climdexSingleMonthlyVector.csv <- function( calendar = "gregorian") { GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) - obj <- climdexSingleMonthlyVector.raw( - primary = GV.csv$data[[1]], - secondary = GV.csv$data[[2]], - dates = GV.csv$dates, - format = format, - northern.hemisphere = northern.hemisphere, - calendar = calendar - ) + return(climdexSingleMonthlyVector.raw( + primary = GV.csv$data[[1]], + secondary = GV.csv$data[[2]], + dates = GV.csv$dates, + format = format, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) - return(obj) } diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R index 5c1ad07..8f199df 100644 --- a/tests/test_single_value_per_month.R +++ b/tests/test_single_value_per_month.R @@ -1,22 +1,49 @@ library(climdex.pcic) library(RUnit) + +validate_climdex_object <- function(obj_raw, obj_csv, primary_data, dates, expected_levels, slot_name, secondary_data = NULL) { + # Validate dates for the primary data + primary_slot <- slot(obj_raw, slot_name) + checkEquals(obj_raw@dates[!is.na(primary_slot)], dates, "Raw object dates for non-NA primary data do not match input dates.") + checkEquals(primary_slot[!is.na(primary_slot)], primary_data, "Raw object primary data for non-NA data does not match input data.") + + # Validate secondary data if applicable + if (!is.null(secondary_data)) { + checkEquals(obj_raw@secondary[!is.na(primary_slot)], secondary_data, "Raw object secondary data for non-NA data does not match input data.") + } + + # Validate date factors + levels_count <- sapply(obj_raw@date.factors, function(factor_obj) length(levels(factor_obj))) + checkEquals(levels_count, expected_levels, msg = "Date factors (annual, monthly, seasonal) do not have the expected number of levels.") + + # Validate jdays + checkEquals(length(obj_raw@jdays), 366, "Raw object jdays for filled leap-year do not match expected.") + + # Validate CSV object against raw object + checkEquals(obj_raw@dates, obj_csv@dates, "Date mismatch between raw and CSV objects.") + checkTrue(all.equal(obj_csv, obj_raw), msg = "Object built from CSV is not identical to raw.") +} + + + climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { set.seed(123) - - scalar_data <- runif(12, 0, 20) # One value per month - dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + scalar_data <- runif(5, 0, 20) # One value per month + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 5) + scalar_obj_raw <- climdexSingleMonthlyScalar.raw( data = scalar_data, dates = dates, northern.hemisphere = TRUE, calendar = "gregorian" ) - + csv_data <- data.frame(date = as.character(dates), data = scalar_data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) - + scalar_obj_csv <- climdexSingleMonthlyScalar.csv( file = temp_csv, data.column = "data", @@ -25,18 +52,23 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(scalar_obj_raw@dates, scalar_obj_csv@dates, "Date mismatch between raw and CSV scalar objects.") - checkTrue(all.equal(scalar_obj_csv, scalar_obj_raw), msg = "Scalar_obj built from CSV is not identical to raw") + + validate_climdex_object( + scalar_obj_raw, scalar_obj_csv, + primary_data = scalar_data, + dates = dates, + expected_levels = c(annual = 1, monthly = 12, seasonal = 5), + slot_name = "data" + ) } climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { set.seed(123) - + primary_data <- runif(12, 0, 20) # One value per month secondary_data <- runif(12, 0, 360) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) - + vector_obj_raw <- climdexSingleMonthlyVector.raw( primary = primary_data, secondary = secondary_data, @@ -45,11 +77,11 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - + csv_data <- data.frame(date = as.character(dates), primary = primary_data, secondary = secondary_data) temp_csv <- tempfile() write.csv(csv_data, temp_csv, row.names = FALSE) - + vector_obj_csv <- climdexSingleMonthlyVector.csv( file = temp_csv, primary.column = "primary", @@ -60,11 +92,19 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { northern.hemisphere = TRUE, calendar = "gregorian" ) - - checkEquals(vector_obj_raw@dates, vector_obj_csv@dates, "Date mismatch between raw and CSV vector objects.") - checkTrue(all.equal(vector_obj_csv, vector_obj_raw), msg = "Vector_obj built from CSV is not identical to raw") + + validate_climdex_object( + vector_obj_raw, vector_obj_csv, + primary_data = primary_data, + dates = dates, + expected_levels = c(annual = 1, monthly = 12, seasonal = 5), + slot_name = "primary", + secondary_data = secondary_data + ) } + + climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { set.seed(123) @@ -84,6 +124,28 @@ climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { ) } + +climdex.pcic.test.SingleMonthlyScalar.sub.annual <- function() { + set.seed(123) + + # Single monthly value data with an NA value + data <- c(1:5) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 5) + + # Create the climdexSingleMonthlyScalar object and expect it to pass without failure + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + scalar_mean <- unname(compute.stat.scalar(scalar_obj, stat = "mean", freq = "annual", include.exact.dates = FALSE))[1] + checkEquals(scalar_mean, 3, "Mean of single value monthly scalar data for sub-annual period does not match expected.") +} + climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { set.seed(123) From fa08c1abbed9f76b4ae111fbaa073cdc5fcbad99 Mon Sep 17 00:00:00 2001 From: Quintin Date: Thu, 7 Nov 2024 13:07:42 -0800 Subject: [PATCH 09/10] Add missing dates check, formatting --- R/GenericVariable_utils.R | 3 +++ R/climdexGenericScalar.R | 15 ++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R index 00d59ae..7fed6fa 100644 --- a/R/GenericVariable_utils.R +++ b/R/GenericVariable_utils.R @@ -13,6 +13,9 @@ check.generic.argument.validity <- function( if (missing(data) || is.null(data)) { stop(paste(name, "argument is missing.")) } + if (missing(dates)) { + stop("Argument 'dates' is missing.") + } if (length(data) == 0 || length(dates) == 0) { stop(paste(name, "and dates must not be empty vectors.")) } diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index 339486d..fe0601a 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -49,13 +49,14 @@ climdexGenericScalar.raw <- function( namasks <- generate_namasks(filled.list, date.factors, max.missing.days) return(new("climdexGenericScalar", - data = filled.list[["data"]], - dates = date.series, - date.factors = date.factors, - jdays = jdays, - namasks = namasks, - northern.hemisphere = northern.hemisphere, - max.missing.days = max.missing.days)) + data = filled.list[["data"]], + dates = date.series, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days + )) } #' @title climdexGenericScalar.csv From 600d7d5e79b0155f4b26d251405ed0ccb1ec357c Mon Sep 17 00:00:00 2001 From: Quintin Date: Thu, 7 Nov 2024 15:30:42 -0800 Subject: [PATCH 10/10] Include NA cases in basic validity check --- R/GenericVariable_utils.R | 2 +- tests/test_single_value_per_month.R | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R index 7fed6fa..cd33f09 100644 --- a/R/GenericVariable_utils.R +++ b/R/GenericVariable_utils.R @@ -19,7 +19,7 @@ check.generic.argument.validity <- function( if (length(data) == 0 || length(dates) == 0) { stop(paste(name, "and dates must not be empty vectors.")) } - if (!is.numeric(data) && (name != "Secondary data")) { + if (!is.numeric(data[!is.na(data)]) && (name != "Secondary data")) { stop(paste(name, "must be numeric.")) } if (length(data) != length(dates)) { diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R index 8f199df..4f2b251 100644 --- a/tests/test_single_value_per_month.R +++ b/tests/test_single_value_per_month.R @@ -5,12 +5,17 @@ library(RUnit) validate_climdex_object <- function(obj_raw, obj_csv, primary_data, dates, expected_levels, slot_name, secondary_data = NULL) { # Validate dates for the primary data primary_slot <- slot(obj_raw, slot_name) - checkEquals(obj_raw@dates[!is.na(primary_slot)], dates, "Raw object dates for non-NA primary data do not match input dates.") - checkEquals(primary_slot[!is.na(primary_slot)], primary_data, "Raw object primary data for non-NA data does not match input data.") + # Validate secondary data if applicable if (!is.null(secondary_data)) { - checkEquals(obj_raw@secondary[!is.na(primary_slot)], secondary_data, "Raw object secondary data for non-NA data does not match input data.") + checkEquals(primary_slot[!is.na(primary_slot)], primary_data[!is.na(primary_data) & !is.na(secondary_data)], "Raw object primary data for non-NA data does not match input data.") + checkEquals(obj_raw@secondary[!is.na(primary_slot)], secondary_data[!is.na(primary_data) & !is.na(secondary_data)], "Raw object secondary data for non-NA data does not match input data.") + checkEquals(obj_raw@dates[!is.na(primary_slot)] , dates[!is.na(primary_data) & !is.na(secondary_data)] , "Raw object dates for non-NA primary data do not match input dates.") + } + else{ + checkEquals(primary_slot[!is.na(primary_slot)], primary_data[!is.na(primary_data)], "Raw object primary data for non-NA data does not match input data.") + checkEquals(obj_raw@dates[!is.na(primary_slot)] , dates[!is.na(primary_data)], "Raw object dates for non-NA primary data do not match input dates.") } # Validate date factors @@ -30,8 +35,8 @@ validate_climdex_object <- function(obj_raw, obj_csv, primary_data, dates, expec climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { set.seed(123) - scalar_data <- runif(5, 0, 20) # One value per month - dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 5) + scalar_data <- c(runif(11, 0, 20), NA) # One value per month + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) scalar_obj_raw <- climdexSingleMonthlyScalar.raw( data = scalar_data, @@ -49,6 +54,7 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { data.column = "data", date.columns = "date", date.format = "%Y-%m-%d", + na.strings = 'NA', northern.hemisphere = TRUE, calendar = "gregorian" ) @@ -65,8 +71,9 @@ climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { set.seed(123) - primary_data <- runif(12, 0, 20) # One value per month - secondary_data <- runif(12, 0, 360) + primary_data <- c(runif(11, 0, 20), NA) # One value per month + + secondary_data <- c(NA, runif(11, 0, 360)) dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) vector_obj_raw <- climdexSingleMonthlyVector.raw( @@ -89,10 +96,10 @@ climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { date.columns = "date", date.format = "%Y-%m-%d", format = "polar", + na.strings = 'NA', northern.hemisphere = TRUE, calendar = "gregorian" ) - validate_climdex_object( vector_obj_raw, vector_obj_csv, primary_data = primary_data,