Skip to content

Commit

Permalink
Merge pull request #134 from MangiolaLaboratory/check-for-factor-cons…
Browse files Browse the repository at this point in the history
…istency

add check for factor consistency
  • Loading branch information
stemangiola authored Feb 4, 2025
2 parents 6158d9d + 143ffc2 commit 2a58b06
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
4 changes: 3 additions & 1 deletion R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,15 @@ sccomp_estimate.data.frame <- function(.data,
.count <- enquo(.count)
.sample_cell_group_pairs_to_exclude <- enquo(.sample_cell_group_pairs_to_exclude)

# Check Sample Consistency of Factors
check_sample_consistency_of_factors(.data, formula_composition, !!.sample)

# Deprecation of .count
if (rlang::quo_is_symbolic(.count)) {
rlang::warn("The argument '.count' is deprecated. Please use '.abundance' instead. This because now `sccomp` cam model both counts and proportions.")
.abundance <- .count
}


# DEPRECATION OF approximate_posterior_inference
if (lifecycle::is_present(approximate_posterior_inference) & !is.null(approximate_posterior_inference)) {
lifecycle::deprecate_warn("1.7.7", "sccomp::sccomp_estimate(approximate_posterior_inference = )", details = "The argument approximate_posterior_inference is now deprecated. Please use inference_method. By default, inference_method value is inferred from approximate_posterior_inference.")
Expand Down
54 changes: 54 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -3512,6 +3512,60 @@ contains_only_valid_chars_for_column <- function(column_names) {
sapply(column_names, check_validity)
}

#' Check Sample Consistency of Factors
#'
#' This function checks for each sample in the provided data frame if the number of unique
#' covariate values from a specified formula matches the number of samples. It is useful for
#' verifying data consistency before statistical analysis. The function stops and throws an
#' error if inconsistencies are found.
#'
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr pull
#' @importFrom dplyr distinct
#' @importFrom tidyr pivot_longer
#' @importFrom purrr map_lgl
#'
#' @param .data A data frame containing the samples and covariates.
#' @param my_formula A formula specifying the covariates to check, passed as a string.
#'
#' @details The function selects the sample and covariates based on `my_formula`, pivots
#' the data longer so each row represents a unique sample-covariate combination, nests
#' the data by covariate name, and checks if the number of unique sample-covariate
#' pairs matches the number of samples for each covariate.
#'
#' @return This function does not return a value; it stops with an error message if any
#' inconsistencies are found.
#'
#' @noRd
#' @keywords internal
check_sample_consistency_of_factors = function(.data, my_formula, .sample){

.sample = enquo(.sample)

# Check that I have one set of covariates per sample
any_covariate_not_matching_sample_size =
.data |>
select(!!.sample, parse_formula(my_formula)) |>
pivot_longer(-!!.sample) |>
nest(data = -name) |>
mutate(correct_size = map_lgl(data,
~
(.x |> distinct(!!.sample, value) |> nrow()) <=
(.x |> distinct(!!.sample) |> nrow())
)) |>
filter(!correct_size)

if( any_covariate_not_matching_sample_size |> nrow() > 0 ) stop(
sprintf("sccomp says: your \"%s\" factor(s) is(are) mismatched across samples. ", any_covariate_not_matching_sample_size |> pull(name) |> paste(collapse = ", ")),
"For example, sample_bar having more than one value for factor_foo. ",
"For sample_bar you should have one value for factor_foo. consistent across groups (e.g. cell types)."
)

}



#' chatGPT - Intelligently Remove Surrounding Brackets from Each String in a Vector
#'
Expand Down

0 comments on commit 2a58b06

Please sign in to comment.