From de046d381d2ce17d0633f268c7465a94659238b7 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Mon, 11 Mar 2024 15:29:14 -0400 Subject: [PATCH 01/93] Implemented LRT functionality --- lib/lcdbwf/R/contrasts.R | 45 +++++++++++++++++++++++----------------- lib/lcdbwf/R/dds.R | 20 +++++++++++++++--- lib/lcdbwf/R/results.R | 24 +++++++++++++++++++++ 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index cd214a86..22b44a0d 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -100,7 +100,7 @@ dds_coefs <- function(dds, ..., expand=FALSE){ } -#' Convenience function for building contrasts +#' Convenience function for building contrasts #' #' @description #' @@ -125,6 +125,9 @@ dds_coefs <- function(dds, ..., expand=FALSE){ #' @param label Label to describe this contrast which will be used in headings. #' @param dds_list List of dds objects. If NULL, then look in the global #' environment for an object called "dds_list" and use that. +#' @param type Type of shrinkage for use by lfcShrink(). If no type is given, +#' we use the current DESeq2 default argument for lfcShrink(type=). If +#' NULL is given, we skip lfcShrink(). #' @param ... Additional arguments are passed to results() and lfcShrink(). If #' "parallel" is not explicitly specified here, then look in the global env for #' a variable called "config" and find the parallel config setting from there. @@ -165,25 +168,12 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ results_dots <- lcdbwf:::match_from_dots(dots, results) res <- do.call("results", results_dots) - # We're about to call lfcShrink, but it needs the res object...so inject the - # one we just made into dots. - dots[['res']] <- res - - # lfcShrink also needs the dds object, so inject that too - dots[['dds']] <- dds - - lfcShrink_dots <- lcdbwf:::match_from_dots(dots, lfcShrink) - res <- do.call("lfcShrink", lfcShrink_dots) - - # Add the shrinkage type to the metadata of the results object. - # # If "type" was specified when calling this function, it's easy and we use - # that. Otherwise, if it was not specified then DESeq2 used the default. + # that. Otherwise, if it was not specified then well use the current DESeq2 default. # Since that default can change as we have seen in the past, we need to # inspect the lfcShrink function itself to see what the current default is, # and use that. - shrinkage_type <- dots[['type']] - if (is.null(shrinkage_type)){ + if (!'type' %in% names(dots)) { # The definition of lfcShrink has a character vector as the type argument, # and we want to extract the first thing in that vector. But formals() # return strings, so we need to eval that string to convert it to @@ -191,11 +181,28 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # # In recent versions this should evaluate to "apeglm". But this way we # are inspecting the function itself if it ever changes. - shrinkage_type <- eval(formals(DESeq2::lfcShrink)$type)[1] + type <- eval(formals(DESeq2::lfcShrink)$type)[1] } - # Add to results object so we can report it out later. - metadata(res)$type <- shrinkage_type + # While lfcShrink doesn't accept NULL as a type, we're using it here as + # a mechanism to disable lfcShrink altogether. + if (!is.null(type)) { + # We're about to call lfcShrink, but it needs the res object...so inject the + # one we just made into dots. + dots[['res']] <- res + + # lfcShrink also needs the dds object, so inject that too + dots[['dds']] <- dds + + lfcShrink_dots <- lcdbwf:::match_from_dots(dots, lfcShrink) + res <- do.call("lfcShrink", lfcShrink_dots) + + # Add the shrinkage type to the metadata of the results object + metadata(res)$type <- type + } else { + # Be explicit, and ensure there's always a type attribute + metadata(res)$type <- NULL + } return( list( diff --git a/lib/lcdbwf/R/dds.R b/lib/lcdbwf/R/dds.R index bcfa22c5..9c886181 100644 --- a/lib/lcdbwf/R/dds.R +++ b/lib/lcdbwf/R/dds.R @@ -46,16 +46,21 @@ kallisto.path.func <- function (x) file.path('..', 'data', 'rnaseq_samples', x, #' @param featureCounts Location of featureCounts output to be loaded make_dds <- function(design_data, config=NULL, collapse_by=NULL, strip_dotted_version=NULL, + default_test='Wald', featureCounts='../data/rnaseq_aggregation/featurecounts.txt', salmon_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.salmon/quant.sf", kallisto_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.kallisto/abundance.h5", ...){ # Note we're using pluck() here for the convenience of setting defaults - coldata <- purrr::pluck(design_data, 'sampletable') design <- purrr::pluck(design_data, 'design') + test <- purrr::pluck(design_data, 'test', .default=default_test) + if (!(test %in% c('Wald', 'LRT'))){ + stop("Valid options for test are 'Wald' (default) or 'LRT'") + } + reduced_design <- purrr::pluck(design_data, 'reduced_design') location <- purrr::pluck(design_data, 'filename', .default=featureCounts) salmon <- purrr::pluck(design_data, 'salmon') kallisto <- purrr::pluck(design_data, 'kallisto') @@ -121,8 +126,17 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL, dds <- lcdbwf:::collapseReplicates2(dds, dds[[collapse_by]]) } - dds <- DESeq(dds, ...) - return(dds) + # Check if we need to perform the LRT on the dds object + if (test == 'Wald') { + dds <- DESeq(dds, test=test, ...) + return(dds) + } else if (test == 'LRT') { + if (is.null(reduced_design)){ + stop("When using LRT, reduced_design must be provided") + } + dds <- DESeq(dds, test=test, reduced=reduced_design, ...) + return(dds) + } } #' Strip dotted version off of the rownames of a dds object diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index b1cec3aa..fa27a0a7 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -33,6 +33,8 @@ build_results_tabs <- function(res_list, dds_list, config, text){ dds_i <- dds_list[[res_list[[name]][['dds']] ]] res_i <- res_list[[name]][['res']] label <- res_list[[name]][['label']] + # Do any contrasts contain LRT? + contains_LRT <- check_LRT(res_i) genes_to_label <- lcdbwf:::genes_to_label(res_i, n=5, config) lcdbwf:::mdcat('## ', label, ' {.tabset}') @@ -42,6 +44,12 @@ build_results_tabs <- function(res_list, dds_list, config, text){ lcdbwf:::mdcat('### M-A plot') lcdbwf:::folded_markdown(text$results_plots$ma, "Help") + # If any contrasts contain LRT, print the source of LFC + # and p values above MA & Volcano plots + if (contains_LRT) { + mdcat(mcols(res_i)$description[9]) + mdcat(mcols(res_i)$description[7]) + } print(lcdbwf:::plotMA_label( res_i, genes_to_label=genes_to_label, @@ -49,6 +57,11 @@ build_results_tabs <- function(res_list, dds_list, config, text){ lcdbwf:::mdcat('### Volcano plot') lcdbwf:::folded_markdown(text$results_plots$volcano, "Help") + if (contains_LRT) { + mdcat(mcols(res_i)$description[9]) + mdcat(mcols(res_i)$description[7]) + } + print(lcdbwf:::plot_volcano_label( res_i, genes_to_label=genes_to_label, @@ -63,3 +76,14 @@ build_results_tabs <- function(res_list, dds_list, config, text){ } } } + +#' Check for LRT in a results object's metadata +#' @param res_i DESeq2 results object +#' @return Boolean TRUE if results object's pvalues were determined +#' via the likelihood-ratio test (LRT) and FALSE if the Wald test +#' was used. +check_LRT <- function(res_i) { + mcols_pval <- mcols(res_i)$description[9] + mcols_pval <- grepl('LRT', mcols_pval) + return(mcols_pval) +} From 08ebdba38422c585518702347e64f25806a498c5 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Tue, 7 May 2024 17:03:27 -0400 Subject: [PATCH 02/93] add func to conv df to enrichResult/gseaResult obj --- lib/lcdbwf/R/helpers.R | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index c88779b1..934f8eb2 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -546,6 +546,42 @@ compose_results <- function(res_list, return(obj) } +#' Convert enrichResult/gseaResult to GeneTonic object +#' +#' This function takes an enrichResult object and +#' DE analysis results and creates a GeneTonic object. +#' +#' @param enrich enrichResult object +#' @param res data frame with DE analysis results +#' +#' @export +enrich_to_genetonic <- function(enrich, res){ + suppressMessages({ + if(class(enrich) == 'enrichResult') + l_gs <- shake_enrichResult(enrich) + else if(class(enrich) == 'gseaResult') + l_gs <- shake_gsenrichResult(enrich) + }) + + if(!'gene' %in% colnames(res)){ + if(!is.null(rownames(res))){ + res$gene <- rownames(res) + res <- as.data.frame(res) %>% relocate(gene) + } else { + stop('Cannot find gene column in result data frame!') + } + } + idx <- match(c('gene','symbol'), tolower(colnames(res))) + if(length(idx) != 2){ + stop('Columns of DE results must contain "gene" & "symbol"') + } + anno_df <- res[,idx] + colnames(anno_df) <- c('gene_id', 'gene_name') + + l_gs <- get_aggrscores(l_gs, res, anno_df) + return(list(l_gs=l_gs, anno_df=anno_df)) +} + #' Add cluster ID columns to res_list objects #' #' @param clusters DegPatterns data frame with gene -> cluster mapping From a6e58075f15e61e4f23a8893376c448330844695 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Tue, 7 May 2024 17:03:54 -0400 Subject: [PATCH 03/93] add func to sanitize res/dds/rld_list --- lib/lcdbwf/R/helpers.R | 122 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 934f8eb2..25cd4f0f 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -582,6 +582,128 @@ enrich_to_genetonic <- function(enrich, res){ return(list(l_gs=l_gs, anno_df=anno_df)) } +#' Sanitize res_list, dds_list & rld_list for use with downstream tools +#' +#' This function makes various validation checks and sanitizes the object: +#' +#' - res_list must be a named list +#' - rownames of res_list objects cannot be NULL +#' - colData of objects cannot contain reserved column names +#' - res_list objects should have exactly one 'gene' & 'symbol' (case-insensitive). +#' If missing, these are replaced by rownames. NA 'symbol' values +#' are replaced by corresponding values from 'gene' column or rownames. +#' - dds_list and rld_list names must match exactly +#' - colData of dds_list & rld_list objects cannot contain reserved columns +#' - Adds a 'sample' column to colData of dds_list & rld_list objects +#' - Builds a dds.mapping object that maps res_list objects to dds_list objects +#' - Flattens res_list object which is replaced by two slots corresponding to +#' 'res' & 'label' elements. +#' +#' @param res_list List of DESeqResults objects +#' @param dds_list List of dds objects +#' @param rld_list List of normalized dds objects +#' @param reserved_cols Column names reserved for internal use. colData +#' of dds_list or rld_list objects cannot contain these columns +#' +sanitize_res_dds <- function(res_list, dds_list, rld_list, + reserved_cols=c('gene', 'symbol')){ + if(is.null(names(res_list))){ + stop('"res_list" must be a named list') + } + + if(is.null(names(dds_list))){ + stop('"dds_list" must be a named list') + } + + for(name in names(res_list)){ + res <- res_list[[ name ]]$res + + # NOTE: rownames of res_list object cannot be NULL + if(is.null(rownames(res))){ + stop(paste('Rownames of res_list elements cannot be NULL:', name)) + } + + # NOTE: check that a single 'gene' column exists. + gene_idx <- grep('gene', tolower(colnames(res))) + if(length(gene_idx) > 1){ + stop(paste('res_list elements can only have 1 "gene" column:', name)) + } else if(length(gene_idx) == 0){ + # If 'gene' column not present, replace with rownames + message(paste('res_list element is missing a "gene" column. "rownames" will be used instead:', name)) + res$gene <- rownames(res) + gene_idx <- grep('gene', tolower(colnames(res))) + } + + # NOTE: check that a single 'symbol' column exists. + symbol_idx <- grep('symbol', tolower(colnames(res))) + if(length(symbol_idx) > 1){ + stop(paste('res_list elements can only have 1 "symbol" column:', name)) + } else if(length(symbol_idx) == 0){ + # If 'symbol' column not present, replace with rownames + message(paste('res_list element is missing a "symbol" column. "rownames" will be used instead:', name)) + res$symbol <- rownames(res) + } else { + # if present, check for NA's & replace with values from 'gene' column + na_idx <- is.na(res[, symbol_idx]) + if(sum(na_idx) > 0){ + res[na_idx, symbol_idx] <- res[na_idx, gene_idx] + } + } + + # plug back in to res_list + res_list[[ name ]]$res <- res + } + + dds_names <- names(dds_list) + rld_names <- names(rld_list) + if(!all(dds_names %in% rld_names)){ + stop(paste('Not all dds_list elements have matching rld_list objects:', + setdiff(dds_names, rld_names))) + } else if(!all(rld_names %in% dds_names)){ + stop(paste('Not all rld_list elements have matching dds_list objects:', + setdiff(rld_names, dds_names))) + } + + for(name in dds_names){ + dds <- dds_list[[ name ]] + rld <- rld_list[[ name ]] + + # NOTE: colData cannot contain reserved column names + if(any(reserved_cols %in% names(colData(dds)))){ + dds_reserved <- intersect(reserved_cols, names(colData(dds))) + stop(paste('colData of dds_list object contains reserved column names -', + paste0(dds_reserved, collapse=', '), ':', name)) + } + + if(any(reserved_cols %in% names(colData(rld)))){ + rld_reserved <- intersect(reserved_cols, names(colData(rld))) + stop(paste('colData of res_list element contains reserved column names -', + paste0(rld_reserved, collapse=', '), ':', name)) + } + + colData(dds)$sample <- rownames(colData(dds)) + colData(rld)$sample <- rownames(colData(rld)) + + dds_list[[ name ]] <- dds + rld_list[[ name ]] <- rld + } + + # build res_list -> dds_list mapping to plug into degpatterns + dds.mapping <- lapply(res_list, function(x) x$dds) + names(dds.mapping) <- names(res_list) + + # build final object + obj <- list( + res=lapply(res_list, function(x) x$res), + dds=dds_list, + rld=rld_list, + labels=lapply(res_list, function(x) x$label), + dds.mapping=dds.mapping) + + return(obj) +} + + #' Add cluster ID columns to res_list objects #' #' @param clusters DegPatterns data frame with gene -> cluster mapping From 6543a252588fefccdf35039a78208255f1dcda1c Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Tue, 7 May 2024 17:04:22 -0400 Subject: [PATCH 04/93] rewrite func to make carnation-ready obj --- lib/lcdbwf/R/helpers.R | 237 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 220 insertions(+), 17 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 25cd4f0f..cb7c0530 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -448,6 +448,12 @@ nested.lapply <- function(x, subfunc, ...){ #' @param rld_list List of normalized dds objects #' @param enrich_list List of enrichment results objects. See details for format. #' @param degpatterns_list List of degpatterns objects +#' @param all_dds Single dds object containing all samples +#' @param all_rld Single normalized dds object containing all samples +#' @param rds_file RDS file containing lcdb-wf object. Can be used to incrementally +#' add elements to a pre-existing run or 'sanitize' an object from a previous run. +#' Ignored if res_list & dds_list are specified. +#' @param workers Number of cores to run GeneTonic conversion on #' #' @details #' @@ -473,9 +479,14 @@ nested.lapply <- function(x, subfunc, ...){ #' ... #' ) #' -#' `enrich_list` is optional. Note that `enrich_list`, if provided, must use -#' results names available in `res_list`. In this example, the names are -#' "ko.vs.wt" and "het.vs.wt". It has the following format: +#' `enrich_list` is optional. Note that `enrich_list`, if provided, these are usually +#' results names available in `res_list`. Alternatively, can have a 'res' key at the +#' second-level containing a result name available in `res_list`. +#' +#' In this example, the names are "ko.vs.wt", "het.vs.wt" & "het.vs.wt_v2". The latter two +#' both map to the `res_list` element "het.vs.wt". +#' +#' It has the following format: #' #' list( #' ko.vs.wt=list( @@ -493,16 +504,52 @@ nested.lapply <- function(x, subfunc, ...){ #' up=list(...), #' down=list(...) #' ), +#' het.vs.wt_v2=list( +#' res='het.vs.wt', +#' up=list(...), +#' down=list(...) +#' ), #' ... #' ) #' #' -compose_results <- function(res_list, - dds_list, +compose_results <- function(res_list=NULL, + dds_list=NULL, rld_list=NULL, enrich_list=NULL, - degpatterns_list=NULL){ + degpatterns_list=NULL, + all.dds=NULL, + all.rld=NULL, + rds_file=NULL, + workers=1){ + + if(is.null(res_list) & is.null(dds_list) & is.null(rds_file)){ + stop('Either "res_list" & "dds_list" or "rds_file" must be specified') + } else if(is.null(res_list) | is.null(dds_list)){ + message(paste('Loading objects from RDS file:', rds_file)) + + if(!file.exists(rds_file)){ + stop(paste('RDS file does not exist:', rds_file)) + } + # get res_list & dds_list (and any others) from RDS file + tmp <- readRDS(rds_file) + + if(!any(c('res_list', 'dds_list') %in% names(tmp))){ + stop('Object must contain "res_list" & "dds_list" elements!') + } + + res_list <- tmp$res_list + dds_list <- tmp$dds_list + + if('rld_list' %in% names(tmp)) rld_list <- tmp$rld_list + if('enrich_list' %in% names(tmp)) enrich_list <- tmp$enrich_list + if('degpatterns_list' %in% names(tmp)) degpatterns_list <- tmp$degpatterns_list + if('all.dds' %in% names(tmp)) all.dds <- tmp$all.dds + if('all.rld' %in% names(tmp)) all.rld <- tmp$all.rld + } + + message('\n1. Processing res_list & dds_list') # Much of this function is just checking that the names all line up. res_dds_names <- unlist(lapply(res_list, function (x) x$dds)) names(res_dds_names) <- NULL @@ -510,39 +557,195 @@ compose_results <- function(res_list, res_not_dds <- setdiff(res_dds_names, dds_dds_names) dds_not_res <- setdiff(dds_dds_names, res_dds_names) if (length(res_not_dds) > 0){ - stop(paste("The following dds names are in res_list but are not found in dds_list:", res_not_dds, '\n')) + stop(paste("\t- The following dds names are in res_list but are not found in dds_list:", + paste(res_not_dds, collapse=', '), '\n')) } + + # NOTE: drop unused dds_list & rld_list objects if (length(dds_not_res) > 0){ - warning(paste("The following dds names are in dds_list but not in res_list. This OK, but may be unexpected:", dds_not_res, '\n')) + message("\t- The following dds names are in dds_list but not in res_list. These will be skipped:") + message(paste0('\t\t', paste(dds_not_res, collapse='\n\t\t'))) + dds_list <- dds_list[ setdiff(dds_dds_names, dds_not_res) ] + if(!is.null(rld_list)){ + rld_list <- rld_list[ setdiff(names(rld_list), dds_not_res) ] + } } # check if rld_list was specified, if not make it if(is.null(rld_list)){ + message("\t- rld_list was not specified. Generating it") rld_list <- lapply(dds_list, function(x) varianceStabilizingTransformation(x, blind=TRUE) ) } - obj <- list( - res_list=res_list, - dds_list=dds_list, - rld_list=rld_list - ) + # sanitize res_list, dds_list & rld_list + obj <- sanitize_res_dds(res_list=res_list, + dds_list=dds_list, + rld_list=rld_list) + + # if all.dds not specified, but dds_list has length 1, + # then use dds_list[[ 1 ]] as all.dds + if(is.null(all.dds) & length(obj$dds) == 1){ + message('\t- all.dds was not specified, but dds_list has only 1 object. Using that instead') + all.dds <- obj$dds[[ 1 ]] + + # if specifying all.dds, compute all.rld even if specified + all.rld <- varianceStabilizingTransformation(all.dds, blind=TRUE) + } + + # if all.dds is specified, but all.rld is not, compute it + if(is.null(all.rld) & !is.null(all.dds)){ + message('\t- all.dds was specified, but not all.rld. Generating it') + all.rld <- varianceStabilizingTransformation(all.dds, blind=TRUE) + } + + message('\t- Generating symbol -> gene mapping from all res_list objects') + # build symbol -> gene mapping from all res_list objects + gene2symbol <- NULL + gene2symbol_names <- NULL + for(name in names(obj$res)){ + res <- obj$res[[name]] + + sidx <- which(tolower(colnames(res)) %in% 'symbol') + gidx <- which(tolower(colnames(res)) %in% 'gene') + + gene2symbol <- c(gene2symbol, unname(res[, sidx])) + gene2symbol_names <- c(gene2symbol_names, res[, gidx]) + } + + # remove duplicates + idx <- !duplicated(gene2symbol) + gene2symbol <- gene2symbol[!idx] + names(gene2symbol) <- gene2symbol_names[!idx] + + # remove NAs + gene2symbol[is.na(gene2symbol)] <- names(gene2symbol)[is.na(gene2symbol)] + + # replace rownames of dds_list, rld_list, all.dds, all.rld with symbol + for(name in names(obj$dds)){ + rownames(obj$dds[[ name ]]) <- gene2symbol[ rownames(obj$dds[[ name ]]) ] + rownames(obj$rld[[ name ]]) <- gene2symbol[ rownames(obj$rld[[ name ]]) ] + } + if(!is.null(all.dds)) rownames(all.dds) <- gene2symbol[ rownames(all.dds) ] + if(!is.null(all.rld)) rownames(all.rld) <- gene2symbol[ rownames(all.rld) ] + + # plug into object + obj[[ 'all.dds' ]] <- all.dds + obj[[ 'all.rld' ]] <- all.rld if (!is.null(enrich_list)){ - res_names <- names(res_list) + message('\n2. Processing enrich_list') + + message('\t- Checking enrich_list names against res_list names') + res_names <- names(obj$res) enrich_names <- names(enrich_list) enrich_not_res <- setdiff(enrich_names, res_names) if (length(enrich_not_res) > 0){ - stop(paste("The following results names are in enrich_list but not in res_list:", enrich_not_res)) + # - if FE object name is missing in res_list, check for 'res' keys + # and make sure all 'res' keys are there in res_list + # - if no 'res' keys, give error and stop + no_res_key <- NULL + no_res_list <- NULL + for(name in enrich_not_res){ + if(!'res' %in% names(enrich_list[[ name ]])){ + no_res_key <- c(no_res_key, name) + } else if(!enrich_list[['res']] %in% res_names){ + no_res_key <- c(no_res_key, name) + } + } + + if(length(no_res_key) > 0){ + stop(paste0("The following names are in enrich_list but do not map to res_list:\n\t", paste(no_res_key, collapse='\n\t'))) + } + } + + # - save enrichResult objects as data.frame + # - generate GeneTonic object + message('\t- Converting enrich_list to genetonic objects') + + message('\t\t- Flattening enrich_list') + # flatten enrich_list + elem_names <- NULL + sep <- '*' + res_keys <- list() + for(x in names(enrich_list)){ + for(y in names(enrich_list[[x]])){ + # NOTE: if key is 'res' save & skip + if(y == 'res'){ + res_keys[[ x ]] <- enrich_list[[ x ]][[ 'res' ]] + next + } + for(z in names(enrich_list[[x]][[y]])){ + elem_names <- c(elem_names, paste(x, y, z, sep=sep)) + } + } + } + names(elem_names) <- elem_names + + message(paste('\t\t- Running conversion using', workers, 'worker(s)')) + # run conversion + # TODO: add check for cores if on biowulf + flat_obj <- BiocParallel::bplapply(elem_names, function(x){ + toks <- strsplit(x, split=sep, fixed=TRUE)[[1]] + if(!toks[1] %in% res_names) + return(NULL) + + res <- obj$res[[ toks[1] ]] + eres <- enrich_list[[ toks[1] ]][[ toks[2] ]][[ toks[3] ]] + + df <- enrich_to_genetonic(eres, res) + + df + }, BPPARAM=BiocParallel::MulticoreParam(workers)) + + message('\t- Reconstituting nested list & saving enrich_list as data frames') + # reconstitute & clean up + enrich_list_slim <- list() + genetonic <- list() + for(x in names(enrich_list)){ + enrich_list_slim[[ x ]] <- list() + genetonic[[ x ]] <- list() + + for(y in names(enrich_list[[x]])){ + # NOTE: if key is 'res' plug in res_key & skip + if(y == 'res'){ + enrich_list_slim[[ x ]][[ 'res' ]] <- res_keys[[ x ]] + next + } else { + enrich_list_slim[[ x ]][[ y ]] <- list() + genetonic[[ x ]][[ y ]] <- list() + } + + for(z in names(enrich_list[[ x ]][[ y ]])){ + key <- paste(x, y, z, sep=sep) + enrich_list_slim[[ x ]][[ y ]][[ z ]] <- enrich_list[[ x ]][[ y ]][[ z ]]@result + genetonic[[ x ]][[ y ]][[ z ]] <- flat_obj[[key]] + } + } } - obj[['enrich_list']] <- enrich_list + + obj[['enrich']] <- enrich_list_slim + obj[['genetonic']] <- genetonic } if(!is.null(degpatterns_list)){ - obj[['degpatterns']] <- degpatterns_list + message('\n3. Processing degpatterns_list') + + message('\t- Only keeping "normalized" slot & adding "symbol" column') + # only keep 'normalized' slot from degpatterns object + # & add 'symbol' column + obj[['degpatterns']] <- lapply(degpatterns_list, function(x){ + df <- x$normalized + if(!'symbol' %in% colnames(df)){ + df$symbol <- gene2symbol[ df$genes ] + } + df + }) } + message('\nDone!') + return(obj) } From 9176a444771d262226ff97e0dc10b79088630fe9 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 10:34:41 -0400 Subject: [PATCH 05/93] correctly conv fe obj with 'res' key --- lib/lcdbwf/R/helpers.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index cb7c0530..d4a8a74c 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -688,10 +688,14 @@ compose_results <- function(res_list=NULL, # TODO: add check for cores if on biowulf flat_obj <- BiocParallel::bplapply(elem_names, function(x){ toks <- strsplit(x, split=sep, fixed=TRUE)[[1]] - if(!toks[1] %in% res_names) + if(toks[1] %in% res_names){ + res <- obj$res[[ toks[1] ]] + } else if(toks[1] %in% names(res_keys)){ + res <- obj$res[[ res_keys[[ toks[1] ]] ]] + } else { return(NULL) + } - res <- obj$res[[ toks[1] ]] eres <- enrich_list[[ toks[1] ]][[ toks[2] ]][[ toks[3] ]] df <- enrich_to_genetonic(eres, res) From eb45816cd0cf31cc4c8c6e85e8d2640d898e7a25 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 11:32:21 -0400 Subject: [PATCH 06/93] update compose_results calls in fe & gene patterns Rmd --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 4 +--- workflows/rnaseq/downstream/gene-patterns.Rmd | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 7eaea3fa..d1ecf952 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -74,9 +74,7 @@ enrich_list <- lcdbwf:::run_enricher(res_list=res_list, ```{r combined_rds, cache=TRUE, dependson='enrich'} -obj <- lcdbwf:::compose_results(res_list=res_list, - dds_list=dds_list, - rld_list=rld_list, +obj <- lcdbwf:::compose_results(rds_file='combined.Rds', enrich_list=enrich_list) saveRDS(obj, file='combined.Rds', compress=FALSE) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 78b71fc7..5608ae25 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -253,10 +253,7 @@ for (name in names(res_list)) { ```{r combined_rds, cache=TRUE, dependson='finalclusters'} -obj <- lcdbwf:::compose_results(res_list=res_list, - dds_list=dds_list, - rld_list=rld_list, - enrich_list=enrich_list, +obj <- lcdbwf:::compose_results(rds_file='combined.Rds', degpatterns_list=degpatterns_list) saveRDS(obj, file='combined.Rds', compress=FALSE) From 74d9f6cbe225ebf50c1fe1c2cd56065de87ec3ae Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 11:34:07 -0400 Subject: [PATCH 07/93] repl '.' w '_' in all.dds/all.rld/dds.mapping --- lib/lcdbwf/R/helpers.R | 48 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index d4a8a74c..a5aa8de9 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -518,8 +518,8 @@ compose_results <- function(res_list=NULL, rld_list=NULL, enrich_list=NULL, degpatterns_list=NULL, - all.dds=NULL, - all.rld=NULL, + all_dds=NULL, + all_rld=NULL, rds_file=NULL, workers=1){ @@ -545,8 +545,8 @@ compose_results <- function(res_list=NULL, if('rld_list' %in% names(tmp)) rld_list <- tmp$rld_list if('enrich_list' %in% names(tmp)) enrich_list <- tmp$enrich_list if('degpatterns_list' %in% names(tmp)) degpatterns_list <- tmp$degpatterns_list - if('all.dds' %in% names(tmp)) all.dds <- tmp$all.dds - if('all.rld' %in% names(tmp)) all.rld <- tmp$all.rld + if('all_dds' %in% names(tmp)) all_dds <- tmp$all_dds + if('all_rld' %in% names(tmp)) all_rld <- tmp$all_rld } message('\n1. Processing res_list & dds_list') @@ -584,20 +584,20 @@ compose_results <- function(res_list=NULL, dds_list=dds_list, rld_list=rld_list) - # if all.dds not specified, but dds_list has length 1, - # then use dds_list[[ 1 ]] as all.dds - if(is.null(all.dds) & length(obj$dds) == 1){ - message('\t- all.dds was not specified, but dds_list has only 1 object. Using that instead') - all.dds <- obj$dds[[ 1 ]] + # if all_dds not specified, but dds_list has length 1, + # then use dds_list[[ 1 ]] as all_dds + if(is.null(all_dds) & length(obj$dds) == 1){ + message('\t- all_dds was not specified, but dds_list has only 1 object. Using that instead') + all_dds <- obj$dds[[ 1 ]] - # if specifying all.dds, compute all.rld even if specified - all.rld <- varianceStabilizingTransformation(all.dds, blind=TRUE) + # if specifying all_dds, compute all_rld even if specified + all_rld <- varianceStabilizingTransformation(all_dds, blind=TRUE) } - # if all.dds is specified, but all.rld is not, compute it - if(is.null(all.rld) & !is.null(all.dds)){ - message('\t- all.dds was specified, but not all.rld. Generating it') - all.rld <- varianceStabilizingTransformation(all.dds, blind=TRUE) + # if all_dds is specified, but all_rld is not, compute it + if(is.null(all_rld) & !is.null(all_dds)){ + message('\t- all_dds was specified, but not all_rld. Generating it') + all_rld <- varianceStabilizingTransformation(all_dds, blind=TRUE) } message('\t- Generating symbol -> gene mapping from all res_list objects') @@ -622,17 +622,17 @@ compose_results <- function(res_list=NULL, # remove NAs gene2symbol[is.na(gene2symbol)] <- names(gene2symbol)[is.na(gene2symbol)] - # replace rownames of dds_list, rld_list, all.dds, all.rld with symbol + # replace rownames of dds_list, rld_list, all_dds, all_rld with symbol for(name in names(obj$dds)){ rownames(obj$dds[[ name ]]) <- gene2symbol[ rownames(obj$dds[[ name ]]) ] rownames(obj$rld[[ name ]]) <- gene2symbol[ rownames(obj$rld[[ name ]]) ] } - if(!is.null(all.dds)) rownames(all.dds) <- gene2symbol[ rownames(all.dds) ] - if(!is.null(all.rld)) rownames(all.rld) <- gene2symbol[ rownames(all.rld) ] + if(!is.null(all_dds)) rownames(all_dds) <- gene2symbol[ rownames(all_dds) ] + if(!is.null(all_rld)) rownames(all_rld) <- gene2symbol[ rownames(all_rld) ] # plug into object - obj[[ 'all.dds' ]] <- all.dds - obj[[ 'all.rld' ]] <- all.rld + obj[[ 'all_dds' ]] <- all_dds + obj[[ 'all_rld' ]] <- all_rld if (!is.null(enrich_list)){ message('\n2. Processing enrich_list') @@ -802,7 +802,7 @@ enrich_to_genetonic <- function(enrich, res){ #' - dds_list and rld_list names must match exactly #' - colData of dds_list & rld_list objects cannot contain reserved columns #' - Adds a 'sample' column to colData of dds_list & rld_list objects -#' - Builds a dds.mapping object that maps res_list objects to dds_list objects +#' - Builds a dds_mapping object that maps res_list objects to dds_list objects #' - Flattens res_list object which is replaced by two slots corresponding to #' 'res' & 'label' elements. #' @@ -896,8 +896,8 @@ sanitize_res_dds <- function(res_list, dds_list, rld_list, } # build res_list -> dds_list mapping to plug into degpatterns - dds.mapping <- lapply(res_list, function(x) x$dds) - names(dds.mapping) <- names(res_list) + dds_mapping <- lapply(res_list, function(x) x$dds) + names(dds_mapping) <- names(res_list) # build final object obj <- list( @@ -905,7 +905,7 @@ sanitize_res_dds <- function(res_list, dds_list, rld_list, dds=dds_list, rld=rld_list, labels=lapply(res_list, function(x) x$label), - dds.mapping=dds.mapping) + dds_mapping=dds_mapping) return(obj) } From 7a257ecd52283427220fb06b3ff37aedbb5e23f9 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 15:02:05 -0400 Subject: [PATCH 08/93] add genetonic; pin r-base to 4.2.2 --- include/requirements-r.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/requirements-r.txt b/include/requirements-r.txt index d0a73326..809ea1ec 100644 --- a/include/requirements-r.txt +++ b/include/requirements-r.txt @@ -5,13 +5,14 @@ bioconductor-clusterprofiler bioconductor-degreport bioconductor-deseq2 bioconductor-genomicfeatures +bioconductor-genetonic bioconductor-go.db bioconductor-ihw bioconductor-rhdf5 bioconductor-sva bioconductor-tximport r-ashr -r-base >4.1 +r-base==4.2.2 r-devtools r-dt r-ggally From e78bca581d6f6a4ee4933db3df28abc067c0f929 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 15:03:06 -0400 Subject: [PATCH 09/93] updt env-r.yml --- env-r.yml | 836 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 436 insertions(+), 400 deletions(-) diff --git a/env-r.yml b/env-r.yml index 4f0a132b..d93b9dbd 100644 --- a/env-r.yml +++ b/env-r.yml @@ -2,403 +2,439 @@ channels: - conda-forge - bioconda dependencies: - - _libgcc_mutex=0.1 - - _openmp_mutex=4.5 - - _r-mutex=1.0.1 - - argcomplete=3.0.5 - - binutils_impl_linux-64=2.40 - - bioconductor-all=1.40.0 - - bioconductor-annotate=1.76.0 - - bioconductor-annotationdbi=1.60.0 - - bioconductor-annotationhub=3.6.0 - - bioconductor-apeglm=1.20.0 - - bioconductor-biobase=2.58.0 - - bioconductor-biocfilecache=2.6.0 - - bioconductor-biocgenerics=0.44.0 - - bioconductor-biocio=1.8.0 - - bioconductor-biocparallel=1.32.5 - - bioconductor-biocversion=3.16.0 - - bioconductor-biomart=2.54.0 - - bioconductor-biostrings=2.66.0 - - bioconductor-clusterprofiler=4.6.0 - - bioconductor-complexheatmap=2.14.0 - - bioconductor-consensusclusterplus=1.62.0 - - bioconductor-data-packages=20230202 - - bioconductor-degreport=1.34.0 - - bioconductor-delayedarray=0.24.0 - - bioconductor-deseq2=1.38.0 - - bioconductor-dose=3.24.0 - - bioconductor-edger=3.40.0 - - bioconductor-enrichplot=1.18.0 - - bioconductor-fgsea=1.24.0 - - bioconductor-genefilter=1.80.0 - - bioconductor-geneplotter=1.76.0 - - bioconductor-genomeinfodb=1.34.8 - - bioconductor-genomeinfodbdata=1.2.9 - - bioconductor-genomicalignments=1.34.0 - - bioconductor-genomicfeatures=1.50.2 - - bioconductor-genomicranges=1.50.0 - - bioconductor-ggtree=3.6.0 - - bioconductor-go.db=3.16.0 - - bioconductor-gosemsim=2.24.0 - - bioconductor-hdo.db=0.99.1 - - bioconductor-ihw=1.26.0 - - bioconductor-interactivedisplaybase=1.36.0 - - bioconductor-iranges=2.32.0 - - bioconductor-keggrest=1.38.0 - - bioconductor-limma=3.54.0 - - bioconductor-lpsymphony=1.26.0 - - bioconductor-matrixgenerics=1.10.0 - - bioconductor-qvalue=2.30.0 - - bioconductor-rhdf5=2.42.0 - - bioconductor-rhdf5filters=1.10.0 - - bioconductor-rhdf5lib=1.20.0 - - bioconductor-rhtslib=2.0.0 - - bioconductor-rsamtools=2.14.0 - - bioconductor-rtracklayer=1.58.0 - - bioconductor-s4vectors=0.36.0 - - bioconductor-summarizedexperiment=1.28.0 - - bioconductor-sva=3.46.0 - - bioconductor-treeio=1.22.0 - - bioconductor-tximport=1.26.0 - - bioconductor-xvector=0.38.0 - - bioconductor-zlibbioc=1.44.0 - - bwidget=1.9.14 - - bzip2=1.0.8 - - c-ares=1.18.1 - - ca-certificates=2022.12.7 - - cairo=1.16.0 - - curl=7.87.0 - - expat=2.5.0 - - font-ttf-dejavu-sans-mono=2.37 - - font-ttf-inconsolata=3.000 - - font-ttf-source-code-pro=2.038 - - font-ttf-ubuntu=0.83 - - fontconfig=2.14.2 - - fonts-conda-ecosystem=1 - - fonts-conda-forge=1 - - freetype=2.12.1 - - fribidi=1.0.10 - - gcc_impl_linux-64=12.2.0 - - gettext=0.21.1 - - gfortran_impl_linux-64=12.2.0 - - glpk=5.0 - - gmp=6.2.1 - - graphite2=1.3.13 - - gsl=2.7 - - gxx_impl_linux-64=12.2.0 - - harfbuzz=6.0.0 - - icu=70.1 - - jpeg=9e - - jq=1.6 - - kernel-headers_linux-64=2.6.32 - - keyutils=1.6.1 - - krb5=1.20.1 - - ld_impl_linux-64=2.40 - - lerc=4.0.0 - - libblas=3.9.0 - - libcblas=3.9.0 - - libcurl=7.87.0 - - libdeflate=1.17 - - libedit=3.1.20191231 - - libev=4.33 - - libexpat=2.5.0 - - libffi=3.4.2 - - libgcc-devel_linux-64=12.2.0 - - libgcc-ng=12.2.0 - - libgfortran-ng=12.2.0 - - libgfortran5=12.2.0 - - libgit2=1.5.1 - - libglib=2.74.1 - - libgomp=12.2.0 - - libiconv=1.17 - - liblapack=3.9.0 - - libnghttp2=1.51.0 - - libnsl=2.0.0 - - libopenblas=0.3.21 - - libpng=1.6.39 - - libsanitizer=12.2.0 - - libsqlite=3.40.0 - - libssh2=1.10.0 - - libstdcxx-devel_linux-64=12.2.0 - - libstdcxx-ng=12.2.0 - - libtiff=4.5.0 - - libuuid=2.38.1 - - libwebp-base=1.3.0 - - libxcb=1.13 - - libxml2=2.10.3 - - libzlib=1.2.13 - - make=4.3 - - ncurses=6.3 - - oniguruma=6.9.8 - - openssl=1.1.1t - - pandoc=2.19.2 - - pango=1.50.14 - - pcre2=10.40 - - pip=23.0.1 - - pixman=0.40.0 - - pthread-stubs=0.4 - - python=3.11.0 - - python_abi=3.11 - - pyyaml=6.0 - - r-ape=5.7_1 - - r-aplot=0.1.10 - - r-ashr=2.2_54 - - r-askpass=1.1 - - r-assertthat=0.2.1 - - r-babelgene=22.9 - - r-backports=1.4.1 - - r-base=4.2.2 - - r-base64enc=0.1_3 - - r-bbmle=1.0.25 - - r-bdsmatrix=1.3_6 - - r-bh=1.81.0_1 - - r-biocmanager=1.30.20 - - r-bit=4.0.5 - - r-bit64=4.0.5 - - r-bitops=1.0_7 - - r-blob=1.2.4 - - r-brew=1.0_8 - - r-brio=1.1.3 - - r-broom=1.0.4 - - r-bslib=0.4.2 - - r-ca=0.71.1 - - r-cachem=1.0.7 - - r-callr=3.7.3 - - r-circlize=0.4.15 - - r-cli=3.6.1 - - r-clipr=0.8.0 - - r-clue=0.3_64 - - r-cluster=2.1.4 - - r-coda=0.19_4 - - r-codetools=0.2_19 - - r-colorspace=2.1_0 - - r-commonmark=1.9.0 - - r-cowplot=1.1.1 - - r-cpp11=0.4.3 - - r-crayon=1.5.2 - - r-credentials=1.3.2 - - r-crosstalk=1.2.0 - - r-curl=4.3.3 - - r-data.table=1.14.8 - - r-dbi=1.1.3 - - r-dbplyr=2.3.2 - - r-dendextend=1.17.1 - - r-desc=1.4.2 - - r-devtools=2.4.5 - - r-diffobj=0.3.5 - - r-digest=0.6.31 - - r-doparallel=1.0.17 - - r-downlit=0.4.2 - - r-downloader=0.4 - - r-dplyr=1.1.1 - - r-dt=0.27 - - r-egg=0.4.5 - - r-ellipsis=0.3.2 - - r-emdbook=1.3.12 - - r-etrunct=0.1 - - r-evaluate=0.20 - - r-fansi=1.0.4 - - r-farver=2.1.1 - - r-fastmap=1.1.1 - - r-fastmatch=1.1_3 - - r-fdrtool=1.2.17 - - r-filelock=1.0.2 - - r-fontawesome=0.5.0 - - r-forcats=1.0.0 - - r-foreach=1.5.2 - - r-formatr=1.14 - - r-fs=1.6.1 - - r-futile.logger=1.4.3 - - r-futile.options=1.0.1 - - r-gclus=1.3.2 - - r-generics=0.1.3 - - r-gert=1.9.2 - - r-getoptlong=1.0.5 - - r-ggally=2.1.2 - - r-ggdendro=0.1.23 - - r-ggforce=0.4.1 - - r-ggfun=0.0.9 - - r-ggnewscale=0.4.8 - - r-ggplot2=3.4.1 - - r-ggplotify=0.1.0 - - r-ggraph=2.1.0 - - r-ggrepel=0.9.3 - - r-gh=1.4.0 - - r-gitcreds=0.1.2 - - r-globaloptions=0.1.2 - - r-glue=1.6.2 - - r-graphlayouts=0.8.4 - - r-gridextra=2.3 - - r-gridgraphics=0.5_1 - - r-gson=0.1.0 - - r-gtable=0.3.3 - - r-heatmaply=1.4.2 - - r-hexbin=1.28.3 - - r-highr=0.10 - - r-hms=1.1.3 - - r-htmltools=0.5.5 - - r-htmlwidgets=1.6.2 - - r-httpuv=1.6.9 - - r-httr=1.4.5 - - r-httr2=0.2.2 - - r-igraph=1.4.1 - - r-ini=0.3.1 - - r-invgamma=1.1 - - r-irlba=2.3.5.1 - - r-isoband=0.2.7 - - r-iterators=1.0.14 - - r-jquerylib=0.1.4 - - r-jsonlite=1.8.4 - - r-knitr=1.42 - - r-labeling=0.4.2 - - r-lambda.r=1.2.4 - - r-later=1.3.0 - - r-lattice=0.20_45 - - r-lazyeval=0.2.2 - - r-lifecycle=1.0.3 - - r-locfit=1.5_9.7 - - r-logging=0.10_108 - - r-magrittr=2.0.3 - - r-mass=7.3_58.3 - - r-matrix=1.5_3 - - r-matrixstats=0.63.0 - - r-memoise=2.0.1 - - r-mgcv=1.8_42 - - r-mime=0.12 - - r-miniui=0.1.1.1 - - r-mixsqp=0.3_48 - - r-mnormt=2.1.1 - - r-msigdbr=7.5.1 - - r-munsell=0.5.0 - - r-mvtnorm=1.1_3 - - r-nlme=3.1_162 - - r-numderiv=2016.8_1.1 - - r-openssl=2.0.5 - - r-openxlsx=4.2.5.2 - - r-patchwork=1.1.2 - - r-pheatmap=1.0.12 - - r-pillar=1.9.0 - - r-pkgbuild=1.4.0 - - r-pkgconfig=2.0.3 - - r-pkgdown=2.0.7 - - r-pkgload=1.3.2 - - r-plogr=0.2.0 - - r-plotly=4.10.1 - - r-plyr=1.8.8 - - r-png=0.1_8 - - r-polyclip=1.10_4 - - r-praise=1.0.0 - - r-prettyunits=1.1.1 - - r-processx=3.8.0 - - r-profvis=0.3.7 - - r-progress=1.2.2 - - r-promises=1.2.0.1 - - r-ps=1.7.3 - - r-psych=2.3.3 - - r-purrr=1.0.1 - - r-qap=0.1_2 - - r-r6=2.5.1 - - r-ragg=1.2.5 - - r-rappdirs=0.3.3 - - r-rcmdcheck=1.4.0 - - r-rcolorbrewer=1.1_3 - - r-rcpp=1.0.10 - - r-rcpparmadillo=0.11.4.4.0 - - r-rcppeigen=0.3.3.9.3 - - r-rcppnumerical=0.5_0 - - r-rcurl=1.98_1.10 - - r-readr=2.1.4 - - r-registry=0.5_1 - - r-rematch2=2.1.2 - - r-remotes=2.4.2 - - r-reshape=0.8.9 - - r-reshape2=1.4.4 - - r-restfulr=0.0.15 - - r-rjson=0.2.21 - - r-rlang=1.1.0 - - r-rmarkdown=2.21 - - r-roxygen2=7.2.3 - - r-rprojroot=2.0.3 - - r-rsqlite=2.3.0 - - r-rstudioapi=0.14 - - r-rvcheck=0.2.1 - - r-rversions=2.1.2 - - r-sass=0.4.5 - - r-scales=1.2.1 - - r-scatterpie=0.1.8 - - r-seriation=1.4.2 - - r-sessioninfo=1.2.2 - - r-shadowtext=0.1.2 - - r-shape=1.4.6 - - r-shiny=1.7.4 - - r-slam=0.1_50 - - r-snow=0.4_4 - - r-sourcetools=0.1.7_1 - - r-sparsem=1.81 - - r-squarem=2021.1 - - r-stringi=1.7.12 - - r-stringr=1.5.0 - - r-survival=3.5_5 - - r-sys=3.4.1 - - r-systemfonts=1.0.4 - - r-testthat=3.1.7 - - r-textshaping=0.3.6 - - r-tibble=3.2.1 - - r-tidygraph=1.2.3 - - r-tidyr=1.3.0 - - r-tidyselect=1.2.0 - - r-tidytree=0.4.2 - - r-tinytex=0.44 - - r-tmvnsim=1.0_2 - - r-truncnorm=1.0_9 - - r-tsp=1.2_3 - - r-tweenr=2.0.2 - - r-tzdb=0.3.0 - - r-upsetr=1.4.0 - - r-urlchecker=1.0.1 - - r-usethis=2.1.6 - - r-utf8=1.2.3 - - r-vctrs=0.6.1 - - r-viridis=0.6.2 - - r-viridislite=0.4.1 - - r-vroom=1.6.1 - - r-waldo=0.4.0 - - r-webshot=0.5.4 - - r-whisker=0.4.1 - - r-withr=2.5.0 - - r-xfun=0.38 - - r-xml=3.99_0.14 - - r-xml2=1.3.3 - - r-xopen=1.0.0 - - r-xtable=1.8_4 - - r-yaml=2.3.7 - - r-yulab.utils=0.0.6 - - r-zip=2.2.2 - - readline=8.2 - - sed=4.8 - - setuptools=67.6.1 - - sysroot_linux-64=2.12 - - tk=8.6.12 - - tktable=2.10 - - toml=0.10.2 - - tzdata=2023c - - wheel=0.40.0 - - xmltodict=0.13.0 - - xorg-kbproto=1.0.7 - - xorg-libice=1.0.10 - - xorg-libsm=1.2.3 - - xorg-libx11=1.8.4 - - xorg-libxau=1.0.9 - - xorg-libxdmcp=1.1.3 - - xorg-libxext=1.3.4 - - xorg-libxrender=0.9.10 - - xorg-libxt=1.2.1 - - xorg-renderproto=0.11.1 - - xorg-xextproto=7.3.0 - - xorg-xproto=7.0.31 - - xz=5.2.6 - - yaml=0.2.5 - - yq=3.1.1 - - zlib=1.2.13 - - zstd=1.5.2 + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - argcomplete=3.3.0=pyhd8ed1ab_0 + - binutils_impl_linux-64=2.40=hf600244_0 + - bioconductor-all=1.40.0=r42hdfd78af_0 + - bioconductor-annotate=1.76.0=r42hdfd78af_0 + - bioconductor-annotationdbi=1.60.0=r42hdfd78af_0 + - bioconductor-annotationhub=3.6.0=r42hdfd78af_0 + - bioconductor-apeglm=1.20.0=r42hf17093f_1 + - bioconductor-biobase=2.58.0=r42ha9d7317_1 + - bioconductor-biocfilecache=2.6.0=r42hdfd78af_0 + - bioconductor-biocgenerics=0.44.0=r42hdfd78af_0 + - bioconductor-biocio=1.8.0=r42hdfd78af_0 + - bioconductor-biocparallel=1.32.5=r42hf17093f_1 + - bioconductor-biocversion=3.16.0=r42hdfd78af_0 + - bioconductor-biomart=2.54.0=r42hdfd78af_0 + - bioconductor-biostrings=2.66.0=r42ha9d7317_1 + - bioconductor-clusterprofiler=4.6.0=r42hdfd78af_0 + - bioconductor-complexheatmap=2.14.0=r42hdfd78af_0 + - bioconductor-consensusclusterplus=1.62.0=r42hdfd78af_0 + - bioconductor-data-packages=20231203=hdfd78af_0 + - bioconductor-degreport=1.34.0=r42hdfd78af_0 + - bioconductor-delayedarray=0.24.0=r42ha9d7317_1 + - bioconductor-deseq2=1.38.0=r42hf17093f_1 + - bioconductor-dose=3.24.0=r42hdfd78af_0 + - bioconductor-edger=3.40.0=r42hf17093f_1 + - bioconductor-enrichplot=1.18.0=r42hdfd78af_0 + - bioconductor-fgsea=1.24.0=r42hf17093f_1 + - bioconductor-genefilter=1.80.0=r42ha1e849b_1 + - bioconductor-geneplotter=1.76.0=r42hdfd78af_0 + - bioconductor-genetonic=2.2.0=r42hdfd78af_0 + - bioconductor-genomeinfodb=1.34.9=r42hdfd78af_0 + - bioconductor-genomeinfodbdata=1.2.9=r42hdfd78af_0 + - bioconductor-genomicalignments=1.34.0=r42ha9d7317_1 + - bioconductor-genomicfeatures=1.50.2=r42hdfd78af_0 + - bioconductor-genomicranges=1.50.0=r42ha9d7317_1 + - bioconductor-ggtree=3.6.0=r42hdfd78af_0 + - bioconductor-go.db=3.16.0=r42hdfd78af_0 + - bioconductor-gosemsim=2.24.0=r42hf17093f_1 + - bioconductor-hdo.db=0.99.1=r42hdfd78af_0 + - bioconductor-ihw=1.26.0=r42hdfd78af_0 + - bioconductor-interactivedisplaybase=1.36.0=r42hdfd78af_0 + - bioconductor-iranges=2.32.0=r42ha9d7317_1 + - bioconductor-keggrest=1.38.0=r42hdfd78af_0 + - bioconductor-limma=3.54.0=r42ha9d7317_1 + - bioconductor-lpsymphony=1.26.0=r42ha1e849b_1 + - bioconductor-matrixgenerics=1.10.0=r42hdfd78af_0 + - bioconductor-qvalue=2.30.0=r42hdfd78af_0 + - bioconductor-rhdf5=2.42.0=r42h20ea6f5_2 + - bioconductor-rhdf5filters=1.10.0=r42hf17093f_1 + - bioconductor-rhdf5lib=1.20.0=r42ha9d7317_2 + - bioconductor-rhtslib=2.0.0=r42ha9d7317_1 + - bioconductor-rsamtools=2.14.0=r42hf17093f_1 + - bioconductor-rtracklayer=1.58.0=r42h58c1800_2 + - bioconductor-s4vectors=0.36.0=r42ha9d7317_1 + - bioconductor-summarizedexperiment=1.28.0=r42hdfd78af_0 + - bioconductor-sva=3.46.0=r42ha9d7317_1 + - bioconductor-treeio=1.22.0=r42hdfd78af_0 + - bioconductor-tximport=1.26.0=r42hdfd78af_0 + - bioconductor-xvector=0.38.0=r42ha9d7317_1 + - bioconductor-zlibbioc=1.44.0=r42ha9d7317_2 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=hd590300_5 + - c-ares=1.28.1=hd590300_0 + - ca-certificates=2024.2.2=hbcca054_0 + - cairo=1.16.0=ha61ee94_1014 + - curl=8.1.2=h409715c_0 + - expat=2.6.2=h59595ed_0 + - fftw=3.3.10=nompi_hc118613_108 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=h77eed37_2 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freetype=2.12.1=h267a509_2 + - fribidi=1.0.10=h516909a_0 + - gcc_impl_linux-64=13.2.0=h9eb54c0_7 + - gettext=0.22.5=h59595ed_2 + - gettext-tools=0.22.5=h59595ed_2 + - gfortran_impl_linux-64=13.2.0=h738fc78_7 + - glpk=5.0=h445213a_0 + - gmp=6.3.0=h59595ed_1 + - graphite2=1.3.13=h59595ed_1003 + - gsl=2.7=he838d99_0 + - gxx_impl_linux-64=13.2.0=h2a599c4_7 + - harfbuzz=6.0.0=h8e241bc_0 + - icu=70.1=h27087fc_0 + - jpeg=9e=h0b41bf4_3 + - jq=1.7.1=hd590300_0 + - kernel-headers_linux-64=2.6.32=he073ed8_17 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.20.1=h81ceb04_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libasprintf=0.22.5=h661eb56_2 + - libasprintf-devel=0.22.5=h661eb56_2 + - libblas=3.9.0=22_linux64_openblas + - libcblas=3.9.0=22_linux64_openblas + - libcurl=8.1.2=h409715c_0 + - libdeflate=1.17=h0b41bf4_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=hd590300_2 + - libexpat=2.6.2=h59595ed_0 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=13.2.0=hceb6213_107 + - libgcc-ng=13.2.0=h77fa898_7 + - libgettextpo=0.22.5=h59595ed_2 + - libgettextpo-devel=0.22.5=h59595ed_2 + - libgfortran-ng=13.2.0=h69a702a_7 + - libgfortran5=13.2.0=hca663fb_7 + - libgit2=1.7.1=hca3a8ce_0 + - libglib=2.78.1=hebfc3b9_0 + - libgomp=13.2.0=h77fa898_7 + - libiconv=1.17=hd590300_2 + - liblapack=3.9.0=22_linux64_openblas + - libnghttp2=1.58.0=h47da74e_1 + - libnsl=2.0.1=hd590300_0 + - libopenblas=0.3.27=pthreads_h413a1c8_0 + - libpng=1.6.43=h2797004_0 + - libsanitizer=13.2.0=h6ddb7a1_7 + - libsqlite=3.45.3=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.2.0=hceb6213_107 + - libstdcxx-ng=13.2.0=hc0a3c3a_7 + - libtiff=4.5.0=h6adf6a1_2 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp-base=1.4.0=hd590300_0 + - libxcb=1.13=h7f98852_1004 + - libxcrypt=4.4.36=hd590300_1 + - libxml2=2.10.3=hca2bb57_4 + - libzlib=1.2.13=hd590300_5 + - make=4.3=hd18ef5c_1 + - ncurses=6.4.20240210=h59595ed_0 + - oniguruma=6.9.9=hd590300_0 + - openssl=3.3.0=hd590300_0 + - pandoc=3.1.13=ha770c72_0 + - pango=1.50.14=hd33c08f_0 + - pcre2=10.40=hc3806b6_0 + - pip=24.0=pyhd8ed1ab_0 + - pixman=0.43.2=h59595ed_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - python=3.12.3=hab00c5b_0_cpython + - python_abi=3.12=4_cp312 + - r-anytime=0.3.9=r42ha503ecb_2 + - r-ape=5.8=r42h08d816e_0 + - r-aplot=0.2.2=r42hc72bb7e_0 + - r-ashr=2.2_63=r42ha503ecb_0 + - r-askpass=1.2.0=r42h57805ef_0 + - r-assertthat=0.2.1=r42hc72bb7e_4 + - r-babelgene=22.9=r42hc72bb7e_2 + - r-backbone=2.1.3=r42ha503ecb_0 + - r-backports=1.4.1=r42h57805ef_2 + - r-base=4.2.2=ha7d60f8_3 + - r-base64enc=0.1_3=r42h57805ef_1006 + - r-bbmle=1.0.25.1=r42hc72bb7e_0 + - r-bdsmatrix=1.3_7=r42h57805ef_0 + - r-bh=1.84.0_0=r42hc72bb7e_0 + - r-biocmanager=1.30.23=r42hc72bb7e_0 + - r-bit=4.0.5=r42h57805ef_1 + - r-bit64=4.0.5=r42h57805ef_2 + - r-bitops=1.0_7=r42h57805ef_2 + - r-blob=1.2.4=r42hc72bb7e_1 + - r-brew=1.0_10=r42hc72bb7e_0 + - r-brio=1.1.5=r42hb1dbf0f_0 + - r-broom=1.0.5=r42hc72bb7e_1 + - r-broom.helpers=1.15.0=r42hc72bb7e_0 + - r-bs4dash=2.3.3=r42hc72bb7e_0 + - r-bslib=0.7.0=r42hc72bb7e_0 + - r-ca=0.71.1=r42hc72bb7e_3 + - r-cachem=1.0.8=r42h57805ef_1 + - r-callr=3.7.6=r42hc72bb7e_0 + - r-circlize=0.4.16=r42hc72bb7e_0 + - r-cli=3.6.2=r42ha503ecb_0 + - r-clipr=0.8.0=r42hc72bb7e_2 + - r-clue=0.3_65=r42h57805ef_0 + - r-cluster=2.1.6=r42h61816a4_0 + - r-coda=0.19_4.1=r42hc72bb7e_0 + - r-codetools=0.2_20=r42hc72bb7e_0 + - r-colorspace=2.1_0=r42h57805ef_1 + - r-colourpicker=1.3.0=r42hc72bb7e_0 + - r-commonmark=1.9.1=r42h57805ef_0 + - r-complexupset=1.3.3=r42hc72bb7e_2 + - r-cowplot=1.1.3=r42hc72bb7e_0 + - r-cpp11=0.4.7=r42hc72bb7e_0 + - r-crayon=1.5.2=r42hc72bb7e_2 + - r-credentials=2.0.1=r42hc72bb7e_0 + - r-crosstalk=1.2.1=r42hc72bb7e_0 + - r-curl=5.0.1=r42hf9611b0_0 + - r-data.table=1.15.2=r42h029312a_0 + - r-dbi=1.2.2=r42hc72bb7e_0 + - r-dbplyr=2.5.0=r42hc72bb7e_0 + - r-dendextend=1.17.1=r42hc72bb7e_1 + - r-desc=1.4.3=r42hc72bb7e_0 + - r-devtools=2.4.5=r42hc72bb7e_2 + - r-diffobj=0.3.5=r42h57805ef_2 + - r-digest=0.6.35=r42ha503ecb_0 + - r-doparallel=1.0.17=r42hc72bb7e_2 + - r-downlit=0.4.3=r42hc72bb7e_0 + - r-downloader=0.4=r42hc72bb7e_1005 + - r-dplyr=1.1.4=r42ha503ecb_0 + - r-dt=0.33=r42hc72bb7e_0 + - r-dynamictreecut=1.63_1=r42hc72bb7e_1006 + - r-egg=0.4.5=r42hc72bb7e_4 + - r-ellipsis=0.3.2=r42h57805ef_2 + - r-emdbook=1.3.13=r42hc72bb7e_0 + - r-etrunct=0.1=r42hc72bb7e_1005 + - r-evaluate=0.23=r42hc72bb7e_0 + - r-expm=0.999_9=r42hd9ac46e_0 + - r-fansi=1.0.6=r42h57805ef_0 + - r-farver=2.1.1=r42ha503ecb_2 + - r-fastmap=1.1.1=r42ha503ecb_1 + - r-fastmatch=1.1_4=r42h57805ef_0 + - r-fdrtool=1.2.17=r42h57805ef_2 + - r-filelock=1.0.3=r42h57805ef_0 + - r-fontawesome=0.5.2=r42hc72bb7e_0 + - r-forcats=1.0.0=r42hc72bb7e_1 + - r-foreach=1.5.2=r42hc72bb7e_2 + - r-formatr=1.14=r42hc72bb7e_1 + - r-fresh=0.2.0=r42hc72bb7e_2 + - r-fs=1.6.4=r42ha18555a_0 + - r-futile.logger=1.4.3=r42hc72bb7e_1005 + - r-futile.options=1.0.1=r42hc72bb7e_1004 + - r-gclus=1.3.2=r42hc72bb7e_4 + - r-generics=0.1.3=r42hc72bb7e_2 + - r-gert=2.0.1=r42hc25a090_0 + - r-getoptlong=1.0.5=r42hc72bb7e_2 + - r-ggally=2.2.1=r42hc72bb7e_0 + - r-ggdendro=0.2.0=r42hc72bb7e_0 + - r-ggforce=0.4.2=r42ha503ecb_0 + - r-ggfun=0.1.4=r42hc72bb7e_0 + - r-ggnewscale=0.4.10=r42hc72bb7e_0 + - r-ggplot2=3.5.1=r42hc72bb7e_0 + - r-ggplotify=0.1.2=r42hc72bb7e_0 + - r-ggraph=2.1.0=r42ha503ecb_2 + - r-ggrepel=0.9.5=r42ha503ecb_0 + - r-ggstats=0.6.0=r42hc72bb7e_0 + - r-gh=1.4.1=r42hc72bb7e_0 + - r-gitcreds=0.1.2=r42hc72bb7e_2 + - r-globaloptions=0.1.2=r42ha770c72_2 + - r-glue=1.7.0=r42h57805ef_0 + - r-graphlayouts=1.1.0=r42ha503ecb_0 + - r-gridextra=2.3=r42hc72bb7e_1005 + - r-gridgraphics=0.5_1=r42hc72bb7e_2 + - r-gson=0.1.0=r42hc72bb7e_1 + - r-gtable=0.3.5=r42hc72bb7e_0 + - r-haven=2.5.4=r42ha503ecb_0 + - r-heatmaply=1.5.0=r42hc72bb7e_0 + - r-hexbin=1.28.3=r42h61816a4_1 + - r-highr=0.10=r42hc72bb7e_1 + - r-hms=1.1.3=r42hc72bb7e_1 + - r-htmltools=0.5.8.1=r42ha503ecb_0 + - r-htmlwidgets=1.6.4=r42hc72bb7e_1 + - r-httpuv=1.6.15=r42ha503ecb_0 + - r-httr=1.4.7=r42hc72bb7e_0 + - r-httr2=0.2.3=r42hc72bb7e_1 + - r-igraph=1.4.2=r42h65ed38e_0 + - r-ini=0.3.1=r42hc72bb7e_1005 + - r-invgamma=1.1=r42hc72bb7e_3 + - r-irlba=2.3.5.1=r42h316c678_1 + - r-isoband=0.2.7=r42ha503ecb_2 + - r-iterators=1.0.14=r42hc72bb7e_2 + - r-jquerylib=0.1.4=r42hc72bb7e_2 + - r-jsonlite=1.8.8=r42h57805ef_0 + - r-knitr=1.46=r42hc72bb7e_0 + - r-labeling=0.4.3=r42hc72bb7e_0 + - r-labelled=2.13.0=r42hc72bb7e_0 + - r-lambda.r=1.2.4=r42hc72bb7e_3 + - r-later=1.3.2=r42ha503ecb_0 + - r-lattice=0.22_6=r42h57805ef_0 + - r-lazyeval=0.2.2=r42h57805ef_4 + - r-lifecycle=1.0.4=r42hc72bb7e_0 + - r-locfit=1.5_9.9=r42h57805ef_0 + - r-logging=0.10_108=r42ha770c72_4 + - r-magrittr=2.0.3=r42h57805ef_2 + - r-mass=7.3_60.0.1=r42h57805ef_0 + - r-matrix=1.6_5=r42h316c678_0 + - r-matrixstats=1.3.0=r42h57805ef_0 + - r-memoise=2.0.1=r42hc72bb7e_2 + - r-mgcv=1.9_1=r42h316c678_0 + - r-mime=0.12=r42h57805ef_2 + - r-miniui=0.1.1.1=r42hc72bb7e_1004 + - r-mixsqp=0.3_54=r42h08d816e_0 + - r-mnormt=2.1.1=r42h61816a4_1 + - r-msigdbr=7.5.1=r42hc72bb7e_2 + - r-munsell=0.5.1=r42hc72bb7e_0 + - r-mvtnorm=1.2_4=r42hd9ac46e_0 + - r-network=1.18.2=r42h57805ef_0 + - r-nlme=3.1_164=r42h61816a4_0 + - r-numderiv=2016.8_1.1=r42hc72bb7e_5 + - r-openssl=2.1.2=r42hd7d0243_0 + - r-openxlsx=4.2.5.2=r42ha503ecb_1 + - r-patchwork=1.2.0=r42hc72bb7e_0 + - r-permute=0.9_7=r42hc72bb7e_2 + - r-pheatmap=1.0.12=r42hc72bb7e_4 + - r-pillar=1.9.0=r42hc72bb7e_1 + - r-pkgbuild=1.4.4=r42hc72bb7e_0 + - r-pkgconfig=2.0.3=r42hc72bb7e_3 + - r-pkgdown=2.0.9=r42hc72bb7e_0 + - r-pkgload=1.3.4=r42hc72bb7e_0 + - r-plogr=0.2.0=r42hc72bb7e_1005 + - r-plotly=4.10.4=r42hc72bb7e_0 + - r-plyr=1.8.9=r42ha503ecb_0 + - r-png=0.1_8=r42h81d01c5_1 + - r-poissonbinomial=1.2.6=r42h182e94e_0 + - r-polyclip=1.10_6=r42ha503ecb_0 + - r-praise=1.0.0=r42hc72bb7e_1007 + - r-prettyunits=1.2.0=r42hc72bb7e_0 + - r-processx=3.8.4=r42h57805ef_0 + - r-profvis=0.3.8=r42h57805ef_3 + - r-progress=1.2.3=r42hc72bb7e_0 + - r-promises=1.3.0=r42ha503ecb_0 + - r-ps=1.7.6=r42h57805ef_0 + - r-psych=2.4.3=r42hc72bb7e_0 + - r-purrr=1.0.2=r42h57805ef_0 + - r-qap=0.1_2=r42h61816a4_2 + - r-r6=2.5.1=r42hc72bb7e_2 + - r-ragg=1.2.5=r42hd65d3ba_0 + - r-rappdirs=0.3.3=r42h57805ef_2 + - r-rcmdcheck=1.4.0=r42h785f33e_2 + - r-rcolorbrewer=1.1_3=r42h785f33e_2 + - r-rcpp=1.0.12=r42h7df8631_0 + - r-rcpparmadillo=0.12.8.2.1=r42h08d816e_0 + - r-rcppeigen=0.3.4.0.0=r42h08d816e_0 + - r-rcppnumerical=0.6_0=r42ha503ecb_0 + - r-rcurl=1.98_1.12=r42hf9611b0_2 + - r-readr=2.1.5=r42ha503ecb_0 + - r-registry=0.5_1=r42hc72bb7e_4 + - r-rematch2=2.1.2=r42hc72bb7e_3 + - r-remotes=2.5.0=r42hc72bb7e_0 + - r-reshape=0.8.9=r42hc72bb7e_2 + - r-reshape2=1.4.4=r42ha503ecb_3 + - r-restfulr=0.0.15=r42h56115f1_2 + - r-rintrojs=0.3.4=r42hc72bb7e_0 + - r-rjson=0.2.21=r42ha503ecb_3 + - r-rlang=1.1.3=r42ha503ecb_0 + - r-rle=0.9.2=r42h57805ef_2 + - r-rmarkdown=2.25=r42hc72bb7e_0 + - r-roxygen2=7.3.1=r42ha503ecb_0 + - r-rprojroot=2.0.4=r42hc72bb7e_0 + - r-rsqlite=2.3.4=r42ha503ecb_0 + - r-rstudioapi=0.16.0=r42hc72bb7e_0 + - r-rvcheck=0.2.1=r42hc72bb7e_2 + - r-rversions=2.1.2=r42hc72bb7e_2 + - r-sass=0.4.9=r42ha503ecb_0 + - r-scales=1.3.0=r42hc72bb7e_0 + - r-scatterpie=0.2.2=r42hc72bb7e_0 + - r-seriation=1.5.5=r42h61816a4_0 + - r-sessioninfo=1.2.2=r42hc72bb7e_2 + - r-shadowtext=0.1.3=r42hc72bb7e_0 + - r-shape=1.4.6.1=r42ha770c72_0 + - r-shiny=1.8.1.1=r42h785f33e_0 + - r-shinyace=0.4.2=r42hc72bb7e_2 + - r-shinycssloaders=1.0.0=r42hc72bb7e_2 + - r-shinyjs=2.1.0=r42hc72bb7e_2 + - r-shinywidgets=0.8.6=r42hc72bb7e_0 + - r-slam=0.1_50=r42h1df0287_3 + - r-snow=0.4_4=r42hc72bb7e_2 + - r-sourcetools=0.1.7_1=r42ha503ecb_1 + - r-sparsem=1.81=r42h61816a4_2 + - r-squarem=2021.1=r42hc72bb7e_2 + - r-statnet.common=4.9.0=r42h57805ef_1 + - r-stringi=1.7.12=r42h1ae9187_0 + - r-stringr=1.5.1=r42h785f33e_0 + - r-survival=3.6_4=r42hb1dbf0f_0 + - r-sys=3.4.2=r42h57805ef_1 + - r-systemfonts=1.0.5=r42haf97adc_0 + - r-testthat=3.2.1.1=r42ha503ecb_0 + - r-textshaping=0.3.6=r42hbb20487_4 + - r-tibble=3.2.1=r42h57805ef_2 + - r-tidygraph=1.3.0=r42ha503ecb_0 + - r-tidyr=1.3.1=r42ha503ecb_0 + - r-tidyselect=1.2.1=r42hc72bb7e_0 + - r-tidytree=0.4.6=r42hc72bb7e_0 + - r-tinytex=0.51=r42hc72bb7e_0 + - r-tippy=0.1.0=r42hc72bb7e_2 + - r-tmvnsim=1.0_2=r42h61816a4_5 + - r-truncnorm=1.0_9=r42h57805ef_1 + - r-tsp=1.2_4=r42h57805ef_1 + - r-tweenr=2.0.3=r42ha503ecb_0 + - r-tzdb=0.4.0=r42ha503ecb_1 + - r-upsetr=1.4.0=r42hc72bb7e_4 + - r-urlchecker=1.0.1=r42hc72bb7e_2 + - r-usethis=2.2.3=r42hc72bb7e_0 + - r-utf8=1.2.4=r42h57805ef_0 + - r-vctrs=0.6.5=r42ha503ecb_0 + - r-vegan=2.6_4=r42hd9ac46e_1 + - r-viridis=0.6.5=r42hc72bb7e_0 + - r-viridislite=0.4.2=r42hc72bb7e_1 + - r-visnetwork=2.1.2=r42hc72bb7e_2 + - r-vroom=1.6.5=r42ha503ecb_0 + - r-waiter=0.2.5=r42hc72bb7e_2 + - r-waldo=0.5.2=r42hc72bb7e_0 + - r-webshot=0.5.5=r42hc72bb7e_0 + - r-whisker=0.4.1=r42hc72bb7e_1 + - r-withr=3.0.0=r42hc72bb7e_0 + - r-xfun=0.43=r42ha503ecb_0 + - r-xml=3.99_0.14=r42hb43fdd4_0 + - r-xml2=1.3.3=r42h044e5c7_2 + - r-xopen=1.0.1=r42hc72bb7e_0 + - r-xtable=1.8_4=r42hc72bb7e_5 + - r-yaml=2.3.8=r42h57805ef_0 + - r-yulab.utils=0.1.4=r42hc72bb7e_0 + - r-zip=2.3.1=r42h57805ef_0 + - readline=8.2=h8228510_1 + - sed=4.8=he412f7d_0 + - setuptools=69.5.1=pyhd8ed1ab_0 + - sysroot_linux-64=2.12=he073ed8_17 + - tk=8.6.13=noxft_h4845f30_101 + - tktable=2.10=h0c5db8f_5 + - toml=0.10.2=pyhd8ed1ab_0 + - tomlkit=0.12.4=pyha770c72_0 + - tzdata=2024a=h0c530f3_0 + - wheel=0.43.0=pyhd8ed1ab_1 + - xmltodict=0.13.0=pyhd8ed1ab_0 + - xorg-kbproto=1.0.7=h14c3975_1002 + - xorg-libice=1.0.10=h516909a_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h516909a_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.3.0=hd590300_0 + - xorg-renderproto=0.11.1=h14c3975_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h14c3975_1007 + - xz=5.2.6=h166bdaf_0 + - yaml=0.2.5=h7f98852_2 + - yq=3.4.3=pyhd8ed1ab_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.6=ha6fb4c9_0 + - pip: + - pyyaml==6.0.1 From 879f4cb5d4819717080dd2d373b997b6f2217c56 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 20:18:28 -0400 Subject: [PATCH 10/93] unpin r-base --- include/requirements-r.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/requirements-r.txt b/include/requirements-r.txt index 809ea1ec..21603eaf 100644 --- a/include/requirements-r.txt +++ b/include/requirements-r.txt @@ -12,13 +12,14 @@ bioconductor-rhdf5 bioconductor-sva bioconductor-tximport r-ashr -r-base==4.2.2 +r-base r-devtools r-dt r-ggally r-ggnewscale r-ggrepel r-heatmaply +r-igraph r-knitr r-msigdbr r-openxlsx From 569889aadb5fa942ec466a6b19ff2007710882cc Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Wed, 8 May 2024 20:18:38 -0400 Subject: [PATCH 11/93] updt env-r.yml --- env-r.yml | 681 +++++++++++++++++++++++++++--------------------------- 1 file changed, 337 insertions(+), 344 deletions(-) diff --git a/env-r.yml b/env-r.yml index d93b9dbd..3f3de8f5 100644 --- a/env-r.yml +++ b/env-r.yml @@ -7,69 +7,69 @@ dependencies: - _r-mutex=1.0.1=anacondar_1 - argcomplete=3.3.0=pyhd8ed1ab_0 - binutils_impl_linux-64=2.40=hf600244_0 - - bioconductor-all=1.40.0=r42hdfd78af_0 - - bioconductor-annotate=1.76.0=r42hdfd78af_0 - - bioconductor-annotationdbi=1.60.0=r42hdfd78af_0 - - bioconductor-annotationhub=3.6.0=r42hdfd78af_0 - - bioconductor-apeglm=1.20.0=r42hf17093f_1 - - bioconductor-biobase=2.58.0=r42ha9d7317_1 - - bioconductor-biocfilecache=2.6.0=r42hdfd78af_0 - - bioconductor-biocgenerics=0.44.0=r42hdfd78af_0 - - bioconductor-biocio=1.8.0=r42hdfd78af_0 - - bioconductor-biocparallel=1.32.5=r42hf17093f_1 - - bioconductor-biocversion=3.16.0=r42hdfd78af_0 - - bioconductor-biomart=2.54.0=r42hdfd78af_0 - - bioconductor-biostrings=2.66.0=r42ha9d7317_1 - - bioconductor-clusterprofiler=4.6.0=r42hdfd78af_0 - - bioconductor-complexheatmap=2.14.0=r42hdfd78af_0 - - bioconductor-consensusclusterplus=1.62.0=r42hdfd78af_0 + - bioconductor-all=1.42.0=r43hdfd78af_0 + - bioconductor-annotate=1.78.0=r43hdfd78af_0 + - bioconductor-annotationdbi=1.62.2=r43hdfd78af_0 + - bioconductor-annotationhub=3.8.0=r43hdfd78af_0 + - bioconductor-apeglm=1.22.1=r43hf17093f_0 + - bioconductor-biobase=2.60.0=r43ha9d7317_0 + - bioconductor-biocfilecache=2.8.0=r43hdfd78af_0 + - bioconductor-biocgenerics=0.46.0=r43hdfd78af_0 + - bioconductor-biocio=1.10.0=r43hdfd78af_0 + - bioconductor-biocparallel=1.34.2=r43hf17093f_0 + - bioconductor-biocversion=3.17.1=r43hdfd78af_0 + - bioconductor-biomart=2.56.1=r43hdfd78af_0 + - bioconductor-biostrings=2.68.1=r43ha9d7317_0 + - bioconductor-clusterprofiler=4.8.1=r43hdfd78af_0 + - bioconductor-complexheatmap=2.16.0=r43hdfd78af_0 + - bioconductor-consensusclusterplus=1.64.0=r43hdfd78af_0 - bioconductor-data-packages=20231203=hdfd78af_0 - - bioconductor-degreport=1.34.0=r42hdfd78af_0 - - bioconductor-delayedarray=0.24.0=r42ha9d7317_1 - - bioconductor-deseq2=1.38.0=r42hf17093f_1 - - bioconductor-dose=3.24.0=r42hdfd78af_0 - - bioconductor-edger=3.40.0=r42hf17093f_1 - - bioconductor-enrichplot=1.18.0=r42hdfd78af_0 - - bioconductor-fgsea=1.24.0=r42hf17093f_1 - - bioconductor-genefilter=1.80.0=r42ha1e849b_1 - - bioconductor-geneplotter=1.76.0=r42hdfd78af_0 - - bioconductor-genetonic=2.2.0=r42hdfd78af_0 - - bioconductor-genomeinfodb=1.34.9=r42hdfd78af_0 - - bioconductor-genomeinfodbdata=1.2.9=r42hdfd78af_0 - - bioconductor-genomicalignments=1.34.0=r42ha9d7317_1 - - bioconductor-genomicfeatures=1.50.2=r42hdfd78af_0 - - bioconductor-genomicranges=1.50.0=r42ha9d7317_1 - - bioconductor-ggtree=3.6.0=r42hdfd78af_0 - - bioconductor-go.db=3.16.0=r42hdfd78af_0 - - bioconductor-gosemsim=2.24.0=r42hf17093f_1 - - bioconductor-hdo.db=0.99.1=r42hdfd78af_0 - - bioconductor-ihw=1.26.0=r42hdfd78af_0 - - bioconductor-interactivedisplaybase=1.36.0=r42hdfd78af_0 - - bioconductor-iranges=2.32.0=r42ha9d7317_1 - - bioconductor-keggrest=1.38.0=r42hdfd78af_0 - - bioconductor-limma=3.54.0=r42ha9d7317_1 - - bioconductor-lpsymphony=1.26.0=r42ha1e849b_1 - - bioconductor-matrixgenerics=1.10.0=r42hdfd78af_0 - - bioconductor-qvalue=2.30.0=r42hdfd78af_0 - - bioconductor-rhdf5=2.42.0=r42h20ea6f5_2 - - bioconductor-rhdf5filters=1.10.0=r42hf17093f_1 - - bioconductor-rhdf5lib=1.20.0=r42ha9d7317_2 - - bioconductor-rhtslib=2.0.0=r42ha9d7317_1 - - bioconductor-rsamtools=2.14.0=r42hf17093f_1 - - bioconductor-rtracklayer=1.58.0=r42h58c1800_2 - - bioconductor-s4vectors=0.36.0=r42ha9d7317_1 - - bioconductor-summarizedexperiment=1.28.0=r42hdfd78af_0 - - bioconductor-sva=3.46.0=r42ha9d7317_1 - - bioconductor-treeio=1.22.0=r42hdfd78af_0 - - bioconductor-tximport=1.26.0=r42hdfd78af_0 - - bioconductor-xvector=0.38.0=r42ha9d7317_1 - - bioconductor-zlibbioc=1.44.0=r42ha9d7317_2 + - bioconductor-degreport=1.36.0=r43hdfd78af_0 + - bioconductor-delayedarray=0.26.6=r43ha9d7317_0 + - bioconductor-deseq2=1.40.2=r43hf17093f_0 + - bioconductor-dose=3.26.1=r43hdfd78af_0 + - bioconductor-edger=3.42.4=r43hf17093f_0 + - bioconductor-enrichplot=1.20.0=r43hdfd78af_0 + - bioconductor-fgsea=1.26.0=r43hf17093f_0 + - bioconductor-genefilter=1.82.1=r43ha1e849b_0 + - bioconductor-genetonic=2.4.0=r43hdfd78af_0 + - bioconductor-genomeinfodb=1.36.1=r43hdfd78af_0 + - bioconductor-genomeinfodbdata=1.2.11=r43hdfd78af_1 + - bioconductor-genomicalignments=1.36.0=r43ha9d7317_0 + - bioconductor-genomicfeatures=1.52.1=r43hdfd78af_0 + - bioconductor-genomicranges=1.52.0=r43ha9d7317_0 + - bioconductor-ggtree=3.8.0=r43hdfd78af_0 + - bioconductor-go.db=3.17.0=r43hdfd78af_0 + - bioconductor-gosemsim=2.26.0=r43hf17093f_0 + - bioconductor-hdo.db=0.99.1=r43hdfd78af_1 + - bioconductor-ihw=1.28.0=r43hdfd78af_0 + - bioconductor-interactivedisplaybase=1.38.0=r43hdfd78af_0 + - bioconductor-iranges=2.34.1=r43ha9d7317_0 + - bioconductor-keggrest=1.40.0=r43hdfd78af_0 + - bioconductor-limma=3.56.2=r43ha9d7317_0 + - bioconductor-lpsymphony=1.28.1=r43hf17093f_0 + - bioconductor-matrixgenerics=1.12.2=r43hdfd78af_0 + - bioconductor-qvalue=2.32.0=r43hdfd78af_0 + - bioconductor-rhdf5=2.44.0=r43hf17093f_1 + - bioconductor-rhdf5filters=1.12.1=r43hf17093f_1 + - bioconductor-rhdf5lib=1.22.0=r43h217d67c_1 + - bioconductor-rhtslib=2.2.0=r43ha9d7317_0 + - bioconductor-rsamtools=2.16.0=r43hf17093f_0 + - bioconductor-rtracklayer=1.60.0=r43ha9d7317_0 + - bioconductor-s4arrays=1.0.4=r43ha9d7317_0 + - bioconductor-s4vectors=0.38.1=r43ha9d7317_0 + - bioconductor-summarizedexperiment=1.30.2=r43hdfd78af_0 + - bioconductor-sva=3.48.0=r43ha9d7317_0 + - bioconductor-treeio=1.24.1=r43hdfd78af_0 + - bioconductor-tximport=1.30.0=r43hdfd78af_1 + - bioconductor-xvector=0.40.0=r43ha9d7317_0 + - bioconductor-zlibbioc=1.46.0=r43ha9d7317_0 - bwidget=1.9.14=ha770c72_1 - bzip2=1.0.8=hd590300_5 - c-ares=1.28.1=hd590300_0 - ca-certificates=2024.2.2=hbcca054_0 - - cairo=1.16.0=ha61ee94_1014 - - curl=8.1.2=h409715c_0 + - cairo=1.18.0=h3faef2a_0 + - curl=8.7.1=hca28451_0 - expat=2.6.2=h59595ed_0 - fftw=3.3.10=nompi_hc118613_108 - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 @@ -82,43 +82,35 @@ dependencies: - freetype=2.12.1=h267a509_2 - fribidi=1.0.10=h516909a_0 - gcc_impl_linux-64=13.2.0=h9eb54c0_7 - - gettext=0.22.5=h59595ed_2 - - gettext-tools=0.22.5=h59595ed_2 - gfortran_impl_linux-64=13.2.0=h738fc78_7 - glpk=5.0=h445213a_0 - gmp=6.3.0=h59595ed_1 - graphite2=1.3.13=h59595ed_1003 - - gsl=2.7=he838d99_0 - gxx_impl_linux-64=13.2.0=h2a599c4_7 - - harfbuzz=6.0.0=h8e241bc_0 - - icu=70.1=h27087fc_0 - - jpeg=9e=h0b41bf4_3 + - harfbuzz=8.4.0=h3d44ed6_0 + - icu=73.2=h59595ed_0 - jq=1.7.1=hd590300_0 - kernel-headers_linux-64=2.6.32=he073ed8_17 - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 + - krb5=1.21.2=h659d440_0 - ld_impl_linux-64=2.40=h41732ed_0 - lerc=4.0.0=h27087fc_0 - - libasprintf=0.22.5=h661eb56_2 - - libasprintf-devel=0.22.5=h661eb56_2 - libblas=3.9.0=22_linux64_openblas - - libcblas=3.9.0=22_linux64_openblas - - libcurl=8.1.2=h409715c_0 - - libdeflate=1.17=h0b41bf4_0 + - libcurl=8.7.1=hca28451_0 + - libdeflate=1.20=hd590300_0 - libedit=3.1.20191231=he28a2e2_2 - libev=4.33=hd590300_2 - libexpat=2.6.2=h59595ed_0 - libffi=3.4.2=h7f98852_5 - libgcc-devel_linux-64=13.2.0=hceb6213_107 - libgcc-ng=13.2.0=h77fa898_7 - - libgettextpo=0.22.5=h59595ed_2 - - libgettextpo-devel=0.22.5=h59595ed_2 - libgfortran-ng=13.2.0=h69a702a_7 - libgfortran5=13.2.0=hca663fb_7 - - libgit2=1.7.1=hca3a8ce_0 - - libglib=2.78.1=hebfc3b9_0 + - libgit2=1.8.0=h65212e3_0 + - libglib=2.80.0=hf2295e7_6 - libgomp=13.2.0=h77fa898_7 - libiconv=1.17=hd590300_2 + - libjpeg-turbo=3.0.0=hd590300_1 - liblapack=3.9.0=22_linux64_openblas - libnghttp2=1.58.0=h47da74e_1 - libnsl=2.0.1=hd590300_0 @@ -129,285 +121,286 @@ dependencies: - libssh2=1.11.0=h0841786_0 - libstdcxx-devel_linux-64=13.2.0=hceb6213_107 - libstdcxx-ng=13.2.0=hc0a3c3a_7 - - libtiff=4.5.0=h6adf6a1_2 + - libtiff=4.6.0=h1dd3fc0_3 - libuuid=2.38.1=h0b41bf4_0 - libwebp-base=1.4.0=hd590300_0 - - libxcb=1.13=h7f98852_1004 + - libxcb=1.15=h0b41bf4_0 - libxcrypt=4.4.36=hd590300_1 - - libxml2=2.10.3=hca2bb57_4 + - libxml2=2.12.6=h232c23b_2 - libzlib=1.2.13=hd590300_5 - make=4.3=hd18ef5c_1 - ncurses=6.4.20240210=h59595ed_0 - oniguruma=6.9.9=hd590300_0 - openssl=3.3.0=hd590300_0 - pandoc=3.1.13=ha770c72_0 - - pango=1.50.14=hd33c08f_0 - - pcre2=10.40=hc3806b6_0 + - pango=1.52.2=ha41ecd1_0 + - pcre2=10.43=hcad00b1_0 - pip=24.0=pyhd8ed1ab_0 - pixman=0.43.2=h59595ed_0 - pthread-stubs=0.4=h36c2ea0_1001 - python=3.12.3=hab00c5b_0_cpython - python_abi=3.12=4_cp312 - - r-anytime=0.3.9=r42ha503ecb_2 - - r-ape=5.8=r42h08d816e_0 - - r-aplot=0.2.2=r42hc72bb7e_0 - - r-ashr=2.2_63=r42ha503ecb_0 - - r-askpass=1.2.0=r42h57805ef_0 - - r-assertthat=0.2.1=r42hc72bb7e_4 - - r-babelgene=22.9=r42hc72bb7e_2 - - r-backbone=2.1.3=r42ha503ecb_0 - - r-backports=1.4.1=r42h57805ef_2 - - r-base=4.2.2=ha7d60f8_3 - - r-base64enc=0.1_3=r42h57805ef_1006 - - r-bbmle=1.0.25.1=r42hc72bb7e_0 - - r-bdsmatrix=1.3_7=r42h57805ef_0 - - r-bh=1.84.0_0=r42hc72bb7e_0 - - r-biocmanager=1.30.23=r42hc72bb7e_0 - - r-bit=4.0.5=r42h57805ef_1 - - r-bit64=4.0.5=r42h57805ef_2 - - r-bitops=1.0_7=r42h57805ef_2 - - r-blob=1.2.4=r42hc72bb7e_1 - - r-brew=1.0_10=r42hc72bb7e_0 - - r-brio=1.1.5=r42hb1dbf0f_0 - - r-broom=1.0.5=r42hc72bb7e_1 - - r-broom.helpers=1.15.0=r42hc72bb7e_0 - - r-bs4dash=2.3.3=r42hc72bb7e_0 - - r-bslib=0.7.0=r42hc72bb7e_0 - - r-ca=0.71.1=r42hc72bb7e_3 - - r-cachem=1.0.8=r42h57805ef_1 - - r-callr=3.7.6=r42hc72bb7e_0 - - r-circlize=0.4.16=r42hc72bb7e_0 - - r-cli=3.6.2=r42ha503ecb_0 - - r-clipr=0.8.0=r42hc72bb7e_2 - - r-clue=0.3_65=r42h57805ef_0 - - r-cluster=2.1.6=r42h61816a4_0 - - r-coda=0.19_4.1=r42hc72bb7e_0 - - r-codetools=0.2_20=r42hc72bb7e_0 - - r-colorspace=2.1_0=r42h57805ef_1 - - r-colourpicker=1.3.0=r42hc72bb7e_0 - - r-commonmark=1.9.1=r42h57805ef_0 - - r-complexupset=1.3.3=r42hc72bb7e_2 - - r-cowplot=1.1.3=r42hc72bb7e_0 - - r-cpp11=0.4.7=r42hc72bb7e_0 - - r-crayon=1.5.2=r42hc72bb7e_2 - - r-credentials=2.0.1=r42hc72bb7e_0 - - r-crosstalk=1.2.1=r42hc72bb7e_0 - - r-curl=5.0.1=r42hf9611b0_0 - - r-data.table=1.15.2=r42h029312a_0 - - r-dbi=1.2.2=r42hc72bb7e_0 - - r-dbplyr=2.5.0=r42hc72bb7e_0 - - r-dendextend=1.17.1=r42hc72bb7e_1 - - r-desc=1.4.3=r42hc72bb7e_0 - - r-devtools=2.4.5=r42hc72bb7e_2 - - r-diffobj=0.3.5=r42h57805ef_2 - - r-digest=0.6.35=r42ha503ecb_0 - - r-doparallel=1.0.17=r42hc72bb7e_2 - - r-downlit=0.4.3=r42hc72bb7e_0 - - r-downloader=0.4=r42hc72bb7e_1005 - - r-dplyr=1.1.4=r42ha503ecb_0 - - r-dt=0.33=r42hc72bb7e_0 - - r-dynamictreecut=1.63_1=r42hc72bb7e_1006 - - r-egg=0.4.5=r42hc72bb7e_4 - - r-ellipsis=0.3.2=r42h57805ef_2 - - r-emdbook=1.3.13=r42hc72bb7e_0 - - r-etrunct=0.1=r42hc72bb7e_1005 - - r-evaluate=0.23=r42hc72bb7e_0 - - r-expm=0.999_9=r42hd9ac46e_0 - - r-fansi=1.0.6=r42h57805ef_0 - - r-farver=2.1.1=r42ha503ecb_2 - - r-fastmap=1.1.1=r42ha503ecb_1 - - r-fastmatch=1.1_4=r42h57805ef_0 - - r-fdrtool=1.2.17=r42h57805ef_2 - - r-filelock=1.0.3=r42h57805ef_0 - - r-fontawesome=0.5.2=r42hc72bb7e_0 - - r-forcats=1.0.0=r42hc72bb7e_1 - - r-foreach=1.5.2=r42hc72bb7e_2 - - r-formatr=1.14=r42hc72bb7e_1 - - r-fresh=0.2.0=r42hc72bb7e_2 - - r-fs=1.6.4=r42ha18555a_0 - - r-futile.logger=1.4.3=r42hc72bb7e_1005 - - r-futile.options=1.0.1=r42hc72bb7e_1004 - - r-gclus=1.3.2=r42hc72bb7e_4 - - r-generics=0.1.3=r42hc72bb7e_2 - - r-gert=2.0.1=r42hc25a090_0 - - r-getoptlong=1.0.5=r42hc72bb7e_2 - - r-ggally=2.2.1=r42hc72bb7e_0 - - r-ggdendro=0.2.0=r42hc72bb7e_0 - - r-ggforce=0.4.2=r42ha503ecb_0 - - r-ggfun=0.1.4=r42hc72bb7e_0 - - r-ggnewscale=0.4.10=r42hc72bb7e_0 - - r-ggplot2=3.5.1=r42hc72bb7e_0 - - r-ggplotify=0.1.2=r42hc72bb7e_0 - - r-ggraph=2.1.0=r42ha503ecb_2 - - r-ggrepel=0.9.5=r42ha503ecb_0 - - r-ggstats=0.6.0=r42hc72bb7e_0 - - r-gh=1.4.1=r42hc72bb7e_0 - - r-gitcreds=0.1.2=r42hc72bb7e_2 - - r-globaloptions=0.1.2=r42ha770c72_2 - - r-glue=1.7.0=r42h57805ef_0 - - r-graphlayouts=1.1.0=r42ha503ecb_0 - - r-gridextra=2.3=r42hc72bb7e_1005 - - r-gridgraphics=0.5_1=r42hc72bb7e_2 - - r-gson=0.1.0=r42hc72bb7e_1 - - r-gtable=0.3.5=r42hc72bb7e_0 - - r-haven=2.5.4=r42ha503ecb_0 - - r-heatmaply=1.5.0=r42hc72bb7e_0 - - r-hexbin=1.28.3=r42h61816a4_1 - - r-highr=0.10=r42hc72bb7e_1 - - r-hms=1.1.3=r42hc72bb7e_1 - - r-htmltools=0.5.8.1=r42ha503ecb_0 - - r-htmlwidgets=1.6.4=r42hc72bb7e_1 - - r-httpuv=1.6.15=r42ha503ecb_0 - - r-httr=1.4.7=r42hc72bb7e_0 - - r-httr2=0.2.3=r42hc72bb7e_1 - - r-igraph=1.4.2=r42h65ed38e_0 - - r-ini=0.3.1=r42hc72bb7e_1005 - - r-invgamma=1.1=r42hc72bb7e_3 - - r-irlba=2.3.5.1=r42h316c678_1 - - r-isoband=0.2.7=r42ha503ecb_2 - - r-iterators=1.0.14=r42hc72bb7e_2 - - r-jquerylib=0.1.4=r42hc72bb7e_2 - - r-jsonlite=1.8.8=r42h57805ef_0 - - r-knitr=1.46=r42hc72bb7e_0 - - r-labeling=0.4.3=r42hc72bb7e_0 - - r-labelled=2.13.0=r42hc72bb7e_0 - - r-lambda.r=1.2.4=r42hc72bb7e_3 - - r-later=1.3.2=r42ha503ecb_0 - - r-lattice=0.22_6=r42h57805ef_0 - - r-lazyeval=0.2.2=r42h57805ef_4 - - r-lifecycle=1.0.4=r42hc72bb7e_0 - - r-locfit=1.5_9.9=r42h57805ef_0 - - r-logging=0.10_108=r42ha770c72_4 - - r-magrittr=2.0.3=r42h57805ef_2 - - r-mass=7.3_60.0.1=r42h57805ef_0 - - r-matrix=1.6_5=r42h316c678_0 - - r-matrixstats=1.3.0=r42h57805ef_0 - - r-memoise=2.0.1=r42hc72bb7e_2 - - r-mgcv=1.9_1=r42h316c678_0 - - r-mime=0.12=r42h57805ef_2 - - r-miniui=0.1.1.1=r42hc72bb7e_1004 - - r-mixsqp=0.3_54=r42h08d816e_0 - - r-mnormt=2.1.1=r42h61816a4_1 - - r-msigdbr=7.5.1=r42hc72bb7e_2 - - r-munsell=0.5.1=r42hc72bb7e_0 - - r-mvtnorm=1.2_4=r42hd9ac46e_0 - - r-network=1.18.2=r42h57805ef_0 - - r-nlme=3.1_164=r42h61816a4_0 - - r-numderiv=2016.8_1.1=r42hc72bb7e_5 - - r-openssl=2.1.2=r42hd7d0243_0 - - r-openxlsx=4.2.5.2=r42ha503ecb_1 - - r-patchwork=1.2.0=r42hc72bb7e_0 - - r-permute=0.9_7=r42hc72bb7e_2 - - r-pheatmap=1.0.12=r42hc72bb7e_4 - - r-pillar=1.9.0=r42hc72bb7e_1 - - r-pkgbuild=1.4.4=r42hc72bb7e_0 - - r-pkgconfig=2.0.3=r42hc72bb7e_3 - - r-pkgdown=2.0.9=r42hc72bb7e_0 - - r-pkgload=1.3.4=r42hc72bb7e_0 - - r-plogr=0.2.0=r42hc72bb7e_1005 - - r-plotly=4.10.4=r42hc72bb7e_0 - - r-plyr=1.8.9=r42ha503ecb_0 - - r-png=0.1_8=r42h81d01c5_1 - - r-poissonbinomial=1.2.6=r42h182e94e_0 - - r-polyclip=1.10_6=r42ha503ecb_0 - - r-praise=1.0.0=r42hc72bb7e_1007 - - r-prettyunits=1.2.0=r42hc72bb7e_0 - - r-processx=3.8.4=r42h57805ef_0 - - r-profvis=0.3.8=r42h57805ef_3 - - r-progress=1.2.3=r42hc72bb7e_0 - - r-promises=1.3.0=r42ha503ecb_0 - - r-ps=1.7.6=r42h57805ef_0 - - r-psych=2.4.3=r42hc72bb7e_0 - - r-purrr=1.0.2=r42h57805ef_0 - - r-qap=0.1_2=r42h61816a4_2 - - r-r6=2.5.1=r42hc72bb7e_2 - - r-ragg=1.2.5=r42hd65d3ba_0 - - r-rappdirs=0.3.3=r42h57805ef_2 - - r-rcmdcheck=1.4.0=r42h785f33e_2 - - r-rcolorbrewer=1.1_3=r42h785f33e_2 - - r-rcpp=1.0.12=r42h7df8631_0 - - r-rcpparmadillo=0.12.8.2.1=r42h08d816e_0 - - r-rcppeigen=0.3.4.0.0=r42h08d816e_0 - - r-rcppnumerical=0.6_0=r42ha503ecb_0 - - r-rcurl=1.98_1.12=r42hf9611b0_2 - - r-readr=2.1.5=r42ha503ecb_0 - - r-registry=0.5_1=r42hc72bb7e_4 - - r-rematch2=2.1.2=r42hc72bb7e_3 - - r-remotes=2.5.0=r42hc72bb7e_0 - - r-reshape=0.8.9=r42hc72bb7e_2 - - r-reshape2=1.4.4=r42ha503ecb_3 - - r-restfulr=0.0.15=r42h56115f1_2 - - r-rintrojs=0.3.4=r42hc72bb7e_0 - - r-rjson=0.2.21=r42ha503ecb_3 - - r-rlang=1.1.3=r42ha503ecb_0 - - r-rle=0.9.2=r42h57805ef_2 - - r-rmarkdown=2.25=r42hc72bb7e_0 - - r-roxygen2=7.3.1=r42ha503ecb_0 - - r-rprojroot=2.0.4=r42hc72bb7e_0 - - r-rsqlite=2.3.4=r42ha503ecb_0 - - r-rstudioapi=0.16.0=r42hc72bb7e_0 - - r-rvcheck=0.2.1=r42hc72bb7e_2 - - r-rversions=2.1.2=r42hc72bb7e_2 - - r-sass=0.4.9=r42ha503ecb_0 - - r-scales=1.3.0=r42hc72bb7e_0 - - r-scatterpie=0.2.2=r42hc72bb7e_0 - - r-seriation=1.5.5=r42h61816a4_0 - - r-sessioninfo=1.2.2=r42hc72bb7e_2 - - r-shadowtext=0.1.3=r42hc72bb7e_0 - - r-shape=1.4.6.1=r42ha770c72_0 - - r-shiny=1.8.1.1=r42h785f33e_0 - - r-shinyace=0.4.2=r42hc72bb7e_2 - - r-shinycssloaders=1.0.0=r42hc72bb7e_2 - - r-shinyjs=2.1.0=r42hc72bb7e_2 - - r-shinywidgets=0.8.6=r42hc72bb7e_0 - - r-slam=0.1_50=r42h1df0287_3 - - r-snow=0.4_4=r42hc72bb7e_2 - - r-sourcetools=0.1.7_1=r42ha503ecb_1 - - r-sparsem=1.81=r42h61816a4_2 - - r-squarem=2021.1=r42hc72bb7e_2 - - r-statnet.common=4.9.0=r42h57805ef_1 - - r-stringi=1.7.12=r42h1ae9187_0 - - r-stringr=1.5.1=r42h785f33e_0 - - r-survival=3.6_4=r42hb1dbf0f_0 - - r-sys=3.4.2=r42h57805ef_1 - - r-systemfonts=1.0.5=r42haf97adc_0 - - r-testthat=3.2.1.1=r42ha503ecb_0 - - r-textshaping=0.3.6=r42hbb20487_4 - - r-tibble=3.2.1=r42h57805ef_2 - - r-tidygraph=1.3.0=r42ha503ecb_0 - - r-tidyr=1.3.1=r42ha503ecb_0 - - r-tidyselect=1.2.1=r42hc72bb7e_0 - - r-tidytree=0.4.6=r42hc72bb7e_0 - - r-tinytex=0.51=r42hc72bb7e_0 - - r-tippy=0.1.0=r42hc72bb7e_2 - - r-tmvnsim=1.0_2=r42h61816a4_5 - - r-truncnorm=1.0_9=r42h57805ef_1 - - r-tsp=1.2_4=r42h57805ef_1 - - r-tweenr=2.0.3=r42ha503ecb_0 - - r-tzdb=0.4.0=r42ha503ecb_1 - - r-upsetr=1.4.0=r42hc72bb7e_4 - - r-urlchecker=1.0.1=r42hc72bb7e_2 - - r-usethis=2.2.3=r42hc72bb7e_0 - - r-utf8=1.2.4=r42h57805ef_0 - - r-vctrs=0.6.5=r42ha503ecb_0 - - r-vegan=2.6_4=r42hd9ac46e_1 - - r-viridis=0.6.5=r42hc72bb7e_0 - - r-viridislite=0.4.2=r42hc72bb7e_1 - - r-visnetwork=2.1.2=r42hc72bb7e_2 - - r-vroom=1.6.5=r42ha503ecb_0 - - r-waiter=0.2.5=r42hc72bb7e_2 - - r-waldo=0.5.2=r42hc72bb7e_0 - - r-webshot=0.5.5=r42hc72bb7e_0 - - r-whisker=0.4.1=r42hc72bb7e_1 - - r-withr=3.0.0=r42hc72bb7e_0 - - r-xfun=0.43=r42ha503ecb_0 - - r-xml=3.99_0.14=r42hb43fdd4_0 - - r-xml2=1.3.3=r42h044e5c7_2 - - r-xopen=1.0.1=r42hc72bb7e_0 - - r-xtable=1.8_4=r42hc72bb7e_5 - - r-yaml=2.3.8=r42h57805ef_0 - - r-yulab.utils=0.1.4=r42hc72bb7e_0 - - r-zip=2.3.1=r42h57805ef_0 + - r-anytime=0.3.9=r43ha503ecb_2 + - r-ape=5.8=r43h08d816e_0 + - r-aplot=0.2.2=r43hc72bb7e_0 + - r-ashr=2.2_63=r43ha503ecb_0 + - r-askpass=1.2.0=r43h57805ef_0 + - r-assertthat=0.2.1=r43hc72bb7e_4 + - r-babelgene=22.9=r43hc72bb7e_2 + - r-backbone=2.1.3=r43ha503ecb_0 + - r-backports=1.4.1=r43h57805ef_2 + - r-base=4.3.3=hf0d99cb_1 + - r-base64enc=0.1_3=r43h57805ef_1006 + - r-bbmle=1.0.25.1=r43hc72bb7e_0 + - r-bdsmatrix=1.3_7=r43h57805ef_0 + - r-bh=1.84.0_0=r43hc72bb7e_0 + - r-biocmanager=1.30.23=r43hc72bb7e_0 + - r-bit=4.0.5=r43h57805ef_1 + - r-bit64=4.0.5=r43h57805ef_2 + - r-bitops=1.0_7=r43h57805ef_2 + - r-blob=1.2.4=r43hc72bb7e_1 + - r-brew=1.0_10=r43hc72bb7e_0 + - r-brio=1.1.5=r43hb1dbf0f_0 + - r-broom=1.0.5=r43hc72bb7e_1 + - r-broom.helpers=1.15.0=r43hc72bb7e_0 + - r-bs4dash=2.3.3=r43hc72bb7e_0 + - r-bslib=0.7.0=r43hc72bb7e_0 + - r-ca=0.71.1=r43hc72bb7e_3 + - r-cachem=1.0.8=r43h57805ef_1 + - r-callr=3.7.6=r43hc72bb7e_0 + - r-circlize=0.4.16=r43hc72bb7e_0 + - r-cli=3.6.2=r43ha503ecb_0 + - r-clipr=0.8.0=r43hc72bb7e_2 + - r-clue=0.3_65=r43h57805ef_0 + - r-cluster=2.1.6=r43h61816a4_0 + - r-coda=0.19_4.1=r43hc72bb7e_0 + - r-codetools=0.2_20=r43hc72bb7e_0 + - r-colorspace=2.1_0=r43h57805ef_1 + - r-colourpicker=1.3.0=r43hc72bb7e_0 + - r-commonmark=1.9.1=r43h57805ef_0 + - r-complexupset=1.3.3=r43hc72bb7e_2 + - r-cowplot=1.1.3=r43hc72bb7e_0 + - r-cpp11=0.4.7=r43hc72bb7e_0 + - r-crayon=1.5.2=r43hc72bb7e_2 + - r-credentials=2.0.1=r43hc72bb7e_0 + - r-crosstalk=1.2.1=r43hc72bb7e_0 + - r-curl=5.1.0=r43hf9611b0_0 + - r-data.table=1.15.2=r43h029312a_0 + - r-dbi=1.2.2=r43hc72bb7e_0 + - r-dbplyr=2.5.0=r43hc72bb7e_0 + - r-dendextend=1.17.1=r43hc72bb7e_1 + - r-desc=1.4.3=r43hc72bb7e_0 + - r-devtools=2.4.5=r43hc72bb7e_2 + - r-diffobj=0.3.5=r43h57805ef_2 + - r-digest=0.6.35=r43ha503ecb_0 + - r-doparallel=1.0.17=r43hc72bb7e_2 + - r-downlit=0.4.3=r43hc72bb7e_0 + - r-downloader=0.4=r43hc72bb7e_1005 + - r-dplyr=1.1.4=r43ha503ecb_0 + - r-dt=0.33=r43hc72bb7e_0 + - r-dynamictreecut=1.63_1=r43hc72bb7e_1006 + - r-egg=0.4.5=r43hc72bb7e_4 + - r-ellipsis=0.3.2=r43h57805ef_2 + - r-emdbook=1.3.13=r43hc72bb7e_0 + - r-etrunct=0.1=r43hc72bb7e_1005 + - r-evaluate=0.23=r43hc72bb7e_0 + - r-expm=0.999_9=r43hd9ac46e_0 + - r-fansi=1.0.6=r43h57805ef_0 + - r-farver=2.1.1=r43ha503ecb_2 + - r-fastmap=1.1.1=r43ha503ecb_1 + - r-fastmatch=1.1_4=r43h57805ef_0 + - r-fdrtool=1.2.17=r43h57805ef_2 + - r-filelock=1.0.3=r43h57805ef_0 + - r-fontawesome=0.5.2=r43hc72bb7e_0 + - r-forcats=1.0.0=r43hc72bb7e_1 + - r-foreach=1.5.2=r43hc72bb7e_2 + - r-formatr=1.14=r43hc72bb7e_1 + - r-fresh=0.2.0=r43hc72bb7e_2 + - r-fs=1.6.4=r43ha18555a_0 + - r-futile.logger=1.4.3=r43hc72bb7e_1005 + - r-futile.options=1.0.1=r43hc72bb7e_1004 + - r-gclus=1.3.2=r43hc72bb7e_4 + - r-generics=0.1.3=r43hc72bb7e_2 + - r-gert=2.0.1=r43h9b95477_1 + - r-getoptlong=1.0.5=r43hc72bb7e_2 + - r-ggally=2.2.1=r43hc72bb7e_0 + - r-ggdendro=0.2.0=r43hc72bb7e_0 + - r-ggforce=0.4.2=r43ha503ecb_0 + - r-ggfun=0.1.4=r43hc72bb7e_0 + - r-ggnewscale=0.4.10=r43hc72bb7e_0 + - r-ggplot2=3.5.1=r43hc72bb7e_0 + - r-ggplotify=0.1.2=r43hc72bb7e_0 + - r-ggraph=2.1.0=r43ha503ecb_2 + - r-ggrepel=0.9.5=r43ha503ecb_0 + - r-ggridges=0.5.6=r43hc72bb7e_0 + - r-ggstats=0.6.0=r43hc72bb7e_0 + - r-gh=1.4.1=r43hc72bb7e_0 + - r-gitcreds=0.1.2=r43hc72bb7e_2 + - r-globaloptions=0.1.2=r43ha770c72_2 + - r-glue=1.7.0=r43h57805ef_0 + - r-graphlayouts=1.1.0=r43ha503ecb_0 + - r-gridextra=2.3=r43hc72bb7e_1005 + - r-gridgraphics=0.5_1=r43hc72bb7e_2 + - r-gson=0.1.0=r43hc72bb7e_1 + - r-gtable=0.3.5=r43hc72bb7e_0 + - r-haven=2.5.4=r43ha503ecb_0 + - r-heatmaply=1.5.0=r43hc72bb7e_0 + - r-hexbin=1.28.3=r43h61816a4_1 + - r-highr=0.10=r43hc72bb7e_1 + - r-hms=1.1.3=r43hc72bb7e_1 + - r-htmltools=0.5.8.1=r43ha503ecb_0 + - r-htmlwidgets=1.6.4=r43hc72bb7e_1 + - r-httpuv=1.6.15=r43ha503ecb_0 + - r-httr=1.4.7=r43hc72bb7e_0 + - r-httr2=1.0.1=r43hc72bb7e_0 + - r-igraph=2.0.3=r43hbec7d4a_0 + - r-ini=0.3.1=r43hc72bb7e_1005 + - r-invgamma=1.1=r43hc72bb7e_3 + - r-irlba=2.3.5.1=r43h316c678_1 + - r-isoband=0.2.7=r43ha503ecb_2 + - r-iterators=1.0.14=r43hc72bb7e_2 + - r-jquerylib=0.1.4=r43hc72bb7e_2 + - r-jsonlite=1.8.8=r43h57805ef_0 + - r-knitr=1.46=r43hc72bb7e_0 + - r-labeling=0.4.3=r43hc72bb7e_0 + - r-labelled=2.13.0=r43hc72bb7e_0 + - r-lambda.r=1.2.4=r43hc72bb7e_3 + - r-later=1.3.2=r43ha503ecb_0 + - r-lattice=0.22_6=r43h57805ef_0 + - r-lazyeval=0.2.2=r43h57805ef_4 + - r-lifecycle=1.0.4=r43hc72bb7e_0 + - r-locfit=1.5_9.9=r43h57805ef_0 + - r-logging=0.10_108=r43ha770c72_4 + - r-magrittr=2.0.3=r43h57805ef_2 + - r-mass=7.3_60=r43h57805ef_1 + - r-matrix=1.6_5=r43h316c678_0 + - r-matrixstats=1.3.0=r43h57805ef_0 + - r-memoise=2.0.1=r43hc72bb7e_2 + - r-mgcv=1.9_1=r43h316c678_0 + - r-mime=0.12=r43h57805ef_2 + - r-miniui=0.1.1.1=r43hc72bb7e_1004 + - r-mixsqp=0.3_54=r43h08d816e_0 + - r-mnormt=2.1.1=r43h61816a4_1 + - r-msigdbr=7.5.1=r43hc72bb7e_2 + - r-munsell=0.5.1=r43hc72bb7e_0 + - r-mvtnorm=1.2_4=r43hd9ac46e_0 + - r-network=1.18.2=r43h57805ef_0 + - r-nlme=3.1_164=r43h61816a4_0 + - r-numderiv=2016.8_1.1=r43hc72bb7e_5 + - r-openssl=2.1.2=r43hd7d0243_0 + - r-openxlsx=4.2.5.2=r43ha503ecb_1 + - r-patchwork=1.2.0=r43hc72bb7e_0 + - r-permute=0.9_7=r43hc72bb7e_2 + - r-pheatmap=1.0.12=r43hc72bb7e_4 + - r-pillar=1.9.0=r43hc72bb7e_1 + - r-pkgbuild=1.4.4=r43hc72bb7e_0 + - r-pkgconfig=2.0.3=r43hc72bb7e_3 + - r-pkgdown=2.0.9=r43hc72bb7e_0 + - r-pkgload=1.3.4=r43hc72bb7e_0 + - r-plogr=0.2.0=r43hc72bb7e_1005 + - r-plotly=4.10.4=r43hc72bb7e_0 + - r-plyr=1.8.9=r43ha503ecb_0 + - r-png=0.1_8=r43h81d01c5_1 + - r-poissonbinomial=1.2.6=r43h182e94e_0 + - r-polyclip=1.10_6=r43ha503ecb_0 + - r-praise=1.0.0=r43hc72bb7e_1007 + - r-prettyunits=1.2.0=r43hc72bb7e_0 + - r-processx=3.8.4=r43h57805ef_0 + - r-profvis=0.3.8=r43h57805ef_3 + - r-progress=1.2.3=r43hc72bb7e_0 + - r-promises=1.3.0=r43ha503ecb_0 + - r-ps=1.7.6=r43h57805ef_0 + - r-psych=2.4.3=r43hc72bb7e_0 + - r-purrr=1.0.2=r43h57805ef_0 + - r-qap=0.1_2=r43h61816a4_2 + - r-r6=2.5.1=r43hc72bb7e_2 + - r-ragg=1.3.1=r43hffa04bc_0 + - r-rappdirs=0.3.3=r43h57805ef_2 + - r-rcmdcheck=1.4.0=r43h785f33e_2 + - r-rcolorbrewer=1.1_3=r43h785f33e_2 + - r-rcpp=1.0.12=r43h7df8631_0 + - r-rcpparmadillo=0.12.8.2.1=r43h08d816e_0 + - r-rcppeigen=0.3.4.0.0=r43h08d816e_0 + - r-rcppnumerical=0.6_0=r43ha503ecb_0 + - r-rcurl=1.98_1.14=r43hf9611b0_0 + - r-readr=2.1.5=r43ha503ecb_0 + - r-registry=0.5_1=r43hc72bb7e_4 + - r-rematch2=2.1.2=r43hc72bb7e_3 + - r-remotes=2.5.0=r43hc72bb7e_0 + - r-reshape=0.8.9=r43hc72bb7e_2 + - r-reshape2=1.4.4=r43ha503ecb_3 + - r-restfulr=0.0.15=r43h56115f1_3 + - r-rintrojs=0.3.4=r43hc72bb7e_0 + - r-rjson=0.2.21=r43ha503ecb_3 + - r-rlang=1.1.3=r43ha503ecb_0 + - r-rle=0.9.2=r43h57805ef_2 + - r-rmarkdown=2.26=r43hc72bb7e_0 + - r-roxygen2=7.3.1=r43ha503ecb_0 + - r-rprojroot=2.0.4=r43hc72bb7e_0 + - r-rsqlite=2.3.4=r43ha503ecb_0 + - r-rstudioapi=0.16.0=r43hc72bb7e_0 + - r-rvcheck=0.2.1=r43hc72bb7e_2 + - r-rversions=2.1.2=r43hc72bb7e_2 + - r-sass=0.4.9=r43ha503ecb_0 + - r-scales=1.3.0=r43hc72bb7e_0 + - r-scatterpie=0.2.2=r43hc72bb7e_0 + - r-seriation=1.5.5=r43h61816a4_0 + - r-sessioninfo=1.2.2=r43hc72bb7e_2 + - r-shadowtext=0.1.3=r43hc72bb7e_0 + - r-shape=1.4.6.1=r43ha770c72_0 + - r-shiny=1.8.1.1=r43h785f33e_0 + - r-shinyace=0.4.2=r43hc72bb7e_2 + - r-shinycssloaders=1.0.0=r43hc72bb7e_2 + - r-shinyjs=2.1.0=r43hc72bb7e_2 + - r-shinywidgets=0.8.6=r43hc72bb7e_0 + - r-slam=0.1_50=r43h1df0287_3 + - r-snow=0.4_4=r43hc72bb7e_2 + - r-sourcetools=0.1.7_1=r43ha503ecb_1 + - r-sparsem=1.81=r43h61816a4_2 + - r-squarem=2021.1=r43hc72bb7e_2 + - r-statnet.common=4.9.0=r43h57805ef_1 + - r-stringi=1.8.4=r43hbd1cc82_0 + - r-stringr=1.5.1=r43h785f33e_0 + - r-survival=3.6_4=r43hb1dbf0f_0 + - r-sys=3.4.2=r43h57805ef_1 + - r-systemfonts=1.0.5=r43haf97adc_0 + - r-testthat=3.2.1.1=r43ha503ecb_0 + - r-textshaping=0.3.7=r43hd87b9d6_0 + - r-tibble=3.2.1=r43h57805ef_2 + - r-tidygraph=1.3.0=r43ha503ecb_0 + - r-tidyr=1.3.1=r43ha503ecb_0 + - r-tidyselect=1.2.1=r43hc72bb7e_0 + - r-tidytree=0.4.6=r43hc72bb7e_0 + - r-tinytex=0.51=r43hc72bb7e_0 + - r-tippy=0.1.0=r43hc72bb7e_2 + - r-tmvnsim=1.0_2=r43h61816a4_5 + - r-truncnorm=1.0_9=r43h57805ef_1 + - r-tsp=1.2_4=r43h57805ef_1 + - r-tweenr=2.0.3=r43ha503ecb_0 + - r-tzdb=0.4.0=r43ha503ecb_1 + - r-upsetr=1.4.0=r43hc72bb7e_4 + - r-urlchecker=1.0.1=r43hc72bb7e_2 + - r-usethis=2.2.3=r43hc72bb7e_0 + - r-utf8=1.2.4=r43h57805ef_0 + - r-vctrs=0.6.5=r43ha503ecb_0 + - r-vegan=2.6_4=r43hd9ac46e_1 + - r-viridis=0.6.5=r43hc72bb7e_0 + - r-viridislite=0.4.2=r43hc72bb7e_1 + - r-visnetwork=2.1.2=r43hc72bb7e_2 + - r-vroom=1.6.5=r43ha503ecb_0 + - r-waiter=0.2.5=r43hc72bb7e_2 + - r-waldo=0.5.2=r43hc72bb7e_0 + - r-webshot=0.5.5=r43hc72bb7e_0 + - r-whisker=0.4.1=r43hc72bb7e_1 + - r-withr=3.0.0=r43hc72bb7e_0 + - r-xfun=0.43=r43ha503ecb_0 + - r-xml=3.99_0.16.1=r43hc6530ce_0 + - r-xml2=1.3.6=r43hbfba7a4_1 + - r-xopen=1.0.1=r43hc72bb7e_0 + - r-xtable=1.8_4=r43hc72bb7e_5 + - r-yaml=2.3.8=r43h57805ef_0 + - r-yulab.utils=0.1.4=r43hc72bb7e_0 + - r-zip=2.3.1=r43h57805ef_0 - readline=8.2=h8228510_1 - sed=4.8=he412f7d_0 - setuptools=69.5.1=pyhd8ed1ab_0 @@ -420,14 +413,14 @@ dependencies: - wheel=0.43.0=pyhd8ed1ab_1 - xmltodict=0.13.0=pyhd8ed1ab_0 - xorg-kbproto=1.0.7=h14c3975_1002 - - xorg-libice=1.0.10=h516909a_0 - - xorg-libsm=1.2.3=hd9c2040_1000 - - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.9=h8ee46fc_0 - xorg-libxau=1.0.11=hd590300_0 - xorg-libxdmcp=1.1.3=h516909a_0 - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-libxt=1.3.0=hd590300_0 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 - xorg-renderproto=0.11.1=h14c3975_1002 - xorg-xextproto=7.3.0=h0b41bf4_1003 - xorg-xproto=7.0.31=h14c3975_1007 From 419db1af9644ba1867acc2ffca1544da8088a4f7 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:46:00 -0400 Subject: [PATCH 12/93] tweak title since not creating RDS file --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index a5aa8de9..76be3f25 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -440,7 +440,7 @@ nested.lapply <- function(x, subfunc, ...){ } -#' Compose an RDS file to be used in downstream tools. +#' Compose an object to be used in downstream tools. #' #' @param res_list List of results objects and associated metadata. See details #' for format. From 76ae7553176a5ef478754acad512bfa6b858170b Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:46:27 -0400 Subject: [PATCH 13/93] updt help to reflect current behavior --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 76be3f25..3906fa84 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -457,7 +457,7 @@ nested.lapply <- function(x, subfunc, ...){ #' #' @details #' -#' res_list and dds_list are required. `res_list` has the following format. +#' res_list and dds_list *or* rds_file are required. `res_list` has the following format. #' #' list( #' ko.vs.wt=list( # names of the list are short keys From 23496569ed4cf2e4f4dcd8dbec50560a310d147e Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:46:45 -0400 Subject: [PATCH 14/93] clean up help --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 3906fa84..e3e772cf 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -479,7 +479,7 @@ nested.lapply <- function(x, subfunc, ...){ #' ... #' ) #' -#' `enrich_list` is optional. Note that `enrich_list`, if provided, these are usually +#' `enrich_list` is optional. If provided, has names corresponding to #' results names available in `res_list`. Alternatively, can have a 'res' key at the #' second-level containing a result name available in `res_list`. #' From 9d0e902f9dfbdbeba8e8dbae59aa12d29092a9b5 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:47:29 -0400 Subject: [PATCH 15/93] only use elements frm RDS file if not specified --- lib/lcdbwf/R/helpers.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index e3e772cf..ee479ccf 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -452,7 +452,9 @@ nested.lapply <- function(x, subfunc, ...){ #' @param all_rld Single normalized dds object containing all samples #' @param rds_file RDS file containing lcdb-wf object. Can be used to incrementally #' add elements to a pre-existing run or 'sanitize' an object from a previous run. -#' Ignored if res_list & dds_list are specified. +#' - Ignored if res_list & dds_list are specified. +#' - If rld_list, enrich_list or degpatterns_list are also provided, these will +#' be used to replace corresponding elements in the RDS file. #' @param workers Number of cores to run GeneTonic conversion on #' #' @details @@ -542,11 +544,12 @@ compose_results <- function(res_list=NULL, res_list <- tmp$res_list dds_list <- tmp$dds_list - if('rld_list' %in% names(tmp)) rld_list <- tmp$rld_list - if('enrich_list' %in% names(tmp)) enrich_list <- tmp$enrich_list - if('degpatterns_list' %in% names(tmp)) degpatterns_list <- tmp$degpatterns_list - if('all_dds' %in% names(tmp)) all_dds <- tmp$all_dds - if('all_rld' %in% names(tmp)) all_rld <- tmp$all_rld + # plug in optional slots unless specified already + if('rld_list' %in% names(tmp) & !is.null(rld_list)) rld_list <- tmp$rld_list + if('enrich_list' %in% names(tmp) & !is.null(enrich_list)) enrich_list <- tmp$enrich_list + if('degpatterns_list' %in% names(tmp) & !is.null(degpatterns_list)) degpatterns_list <- tmp$degpatterns_list + if('all_dds' %in% names(tmp) & !is.null(all_dds)) all_dds <- tmp$all_dds + if('all_rld' %in% names(tmp) & !is.null(all_rld)) all_rld <- tmp$all_rld } message('\n1. Processing res_list & dds_list') From 52360e3bfc1112fe87832e8af0ae3570876f0615 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:49:59 -0400 Subject: [PATCH 16/93] clean up comment --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index ee479ccf..2d2f4ae7 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -564,7 +564,7 @@ compose_results <- function(res_list=NULL, paste(res_not_dds, collapse=', '), '\n')) } - # NOTE: drop unused dds_list & rld_list objects + # Drop unused dds_list & rld_list objects if (length(dds_not_res) > 0){ message("\t- The following dds names are in dds_list but not in res_list. These will be skipped:") message(paste0('\t\t', paste(dds_not_res, collapse='\n\t\t'))) From 0440033a096f2f05ecbc0dbe51bee64dba5d0bc7 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:50:11 -0400 Subject: [PATCH 17/93] add desc of returned list --- lib/lcdbwf/R/helpers.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 2d2f4ae7..865c227d 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -815,6 +815,8 @@ enrich_to_genetonic <- function(enrich, res){ #' @param reserved_cols Column names reserved for internal use. colData #' of dds_list or rld_list objects cannot contain these columns #' +#' @return A list with the names 'res', 'dds', 'rld', 'labels', 'dds_mapping' +#' sanitize_res_dds <- function(res_list, dds_list, rld_list, reserved_cols=c('gene', 'symbol')){ if(is.null(names(res_list))){ From a3f721ae76b8cf3a91c1c599b3d3e45fdf729307 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:50:24 -0400 Subject: [PATCH 18/93] show msg if recalc all_rld --- lib/lcdbwf/R/helpers.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 865c227d..b56c1d5a 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -594,6 +594,9 @@ compose_results <- function(res_list=NULL, all_dds <- obj$dds[[ 1 ]] # if specifying all_dds, compute all_rld even if specified + if(!is.null(all_rld)){ + message('\t- Generating all_rld from new all_dds') + } all_rld <- varianceStabilizingTransformation(all_dds, blind=TRUE) } From 92e8d0894b6b8b060245377bfaa4e0094c474369 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 09:57:06 -0400 Subject: [PATCH 19/93] rm blds frm env-r.yaml --- env-r.yml | 854 +++++++++++++++++++++++++++--------------------------- 1 file changed, 427 insertions(+), 427 deletions(-) diff --git a/env-r.yml b/env-r.yml index 3f3de8f5..c871259b 100644 --- a/env-r.yml +++ b/env-r.yml @@ -2,432 +2,432 @@ channels: - conda-forge - bioconda dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - _r-mutex=1.0.1=anacondar_1 - - argcomplete=3.3.0=pyhd8ed1ab_0 - - binutils_impl_linux-64=2.40=hf600244_0 - - bioconductor-all=1.42.0=r43hdfd78af_0 - - bioconductor-annotate=1.78.0=r43hdfd78af_0 - - bioconductor-annotationdbi=1.62.2=r43hdfd78af_0 - - bioconductor-annotationhub=3.8.0=r43hdfd78af_0 - - bioconductor-apeglm=1.22.1=r43hf17093f_0 - - bioconductor-biobase=2.60.0=r43ha9d7317_0 - - bioconductor-biocfilecache=2.8.0=r43hdfd78af_0 - - bioconductor-biocgenerics=0.46.0=r43hdfd78af_0 - - bioconductor-biocio=1.10.0=r43hdfd78af_0 - - bioconductor-biocparallel=1.34.2=r43hf17093f_0 - - bioconductor-biocversion=3.17.1=r43hdfd78af_0 - - bioconductor-biomart=2.56.1=r43hdfd78af_0 - - bioconductor-biostrings=2.68.1=r43ha9d7317_0 - - bioconductor-clusterprofiler=4.8.1=r43hdfd78af_0 - - bioconductor-complexheatmap=2.16.0=r43hdfd78af_0 - - bioconductor-consensusclusterplus=1.64.0=r43hdfd78af_0 - - bioconductor-data-packages=20231203=hdfd78af_0 - - bioconductor-degreport=1.36.0=r43hdfd78af_0 - - bioconductor-delayedarray=0.26.6=r43ha9d7317_0 - - bioconductor-deseq2=1.40.2=r43hf17093f_0 - - bioconductor-dose=3.26.1=r43hdfd78af_0 - - bioconductor-edger=3.42.4=r43hf17093f_0 - - bioconductor-enrichplot=1.20.0=r43hdfd78af_0 - - bioconductor-fgsea=1.26.0=r43hf17093f_0 - - bioconductor-genefilter=1.82.1=r43ha1e849b_0 - - bioconductor-genetonic=2.4.0=r43hdfd78af_0 - - bioconductor-genomeinfodb=1.36.1=r43hdfd78af_0 - - bioconductor-genomeinfodbdata=1.2.11=r43hdfd78af_1 - - bioconductor-genomicalignments=1.36.0=r43ha9d7317_0 - - bioconductor-genomicfeatures=1.52.1=r43hdfd78af_0 - - bioconductor-genomicranges=1.52.0=r43ha9d7317_0 - - bioconductor-ggtree=3.8.0=r43hdfd78af_0 - - bioconductor-go.db=3.17.0=r43hdfd78af_0 - - bioconductor-gosemsim=2.26.0=r43hf17093f_0 - - bioconductor-hdo.db=0.99.1=r43hdfd78af_1 - - bioconductor-ihw=1.28.0=r43hdfd78af_0 - - bioconductor-interactivedisplaybase=1.38.0=r43hdfd78af_0 - - bioconductor-iranges=2.34.1=r43ha9d7317_0 - - bioconductor-keggrest=1.40.0=r43hdfd78af_0 - - bioconductor-limma=3.56.2=r43ha9d7317_0 - - bioconductor-lpsymphony=1.28.1=r43hf17093f_0 - - bioconductor-matrixgenerics=1.12.2=r43hdfd78af_0 - - bioconductor-qvalue=2.32.0=r43hdfd78af_0 - - bioconductor-rhdf5=2.44.0=r43hf17093f_1 - - bioconductor-rhdf5filters=1.12.1=r43hf17093f_1 - - bioconductor-rhdf5lib=1.22.0=r43h217d67c_1 - - bioconductor-rhtslib=2.2.0=r43ha9d7317_0 - - bioconductor-rsamtools=2.16.0=r43hf17093f_0 - - bioconductor-rtracklayer=1.60.0=r43ha9d7317_0 - - bioconductor-s4arrays=1.0.4=r43ha9d7317_0 - - bioconductor-s4vectors=0.38.1=r43ha9d7317_0 - - bioconductor-summarizedexperiment=1.30.2=r43hdfd78af_0 - - bioconductor-sva=3.48.0=r43ha9d7317_0 - - bioconductor-treeio=1.24.1=r43hdfd78af_0 - - bioconductor-tximport=1.30.0=r43hdfd78af_1 - - bioconductor-xvector=0.40.0=r43ha9d7317_0 - - bioconductor-zlibbioc=1.46.0=r43ha9d7317_0 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=hd590300_5 - - c-ares=1.28.1=hd590300_0 - - ca-certificates=2024.2.2=hbcca054_0 - - cairo=1.18.0=h3faef2a_0 - - curl=8.7.1=hca28451_0 - - expat=2.6.2=h59595ed_0 - - fftw=3.3.10=nompi_hc118613_108 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=h77eed37_2 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.12.1=h267a509_2 - - fribidi=1.0.10=h516909a_0 - - gcc_impl_linux-64=13.2.0=h9eb54c0_7 - - gfortran_impl_linux-64=13.2.0=h738fc78_7 - - glpk=5.0=h445213a_0 - - gmp=6.3.0=h59595ed_1 - - graphite2=1.3.13=h59595ed_1003 - - gxx_impl_linux-64=13.2.0=h2a599c4_7 - - harfbuzz=8.4.0=h3d44ed6_0 - - icu=73.2=h59595ed_0 - - jq=1.7.1=hd590300_0 - - kernel-headers_linux-64=2.6.32=he073ed8_17 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.21.2=h659d440_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=22_linux64_openblas - - libcurl=8.7.1=hca28451_0 - - libdeflate=1.20=hd590300_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=hd590300_2 - - libexpat=2.6.2=h59595ed_0 - - libffi=3.4.2=h7f98852_5 - - libgcc-devel_linux-64=13.2.0=hceb6213_107 - - libgcc-ng=13.2.0=h77fa898_7 - - libgfortran-ng=13.2.0=h69a702a_7 - - libgfortran5=13.2.0=hca663fb_7 - - libgit2=1.8.0=h65212e3_0 - - libglib=2.80.0=hf2295e7_6 - - libgomp=13.2.0=h77fa898_7 - - libiconv=1.17=hd590300_2 - - libjpeg-turbo=3.0.0=hd590300_1 - - liblapack=3.9.0=22_linux64_openblas - - libnghttp2=1.58.0=h47da74e_1 - - libnsl=2.0.1=hd590300_0 - - libopenblas=0.3.27=pthreads_h413a1c8_0 - - libpng=1.6.43=h2797004_0 - - libsanitizer=13.2.0=h6ddb7a1_7 - - libsqlite=3.45.3=h2797004_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-devel_linux-64=13.2.0=hceb6213_107 - - libstdcxx-ng=13.2.0=hc0a3c3a_7 - - libtiff=4.6.0=h1dd3fc0_3 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.4.0=hd590300_0 - - libxcb=1.15=h0b41bf4_0 - - libxcrypt=4.4.36=hd590300_1 - - libxml2=2.12.6=h232c23b_2 - - libzlib=1.2.13=hd590300_5 - - make=4.3=hd18ef5c_1 - - ncurses=6.4.20240210=h59595ed_0 - - oniguruma=6.9.9=hd590300_0 - - openssl=3.3.0=hd590300_0 - - pandoc=3.1.13=ha770c72_0 - - pango=1.52.2=ha41ecd1_0 - - pcre2=10.43=hcad00b1_0 - - pip=24.0=pyhd8ed1ab_0 - - pixman=0.43.2=h59595ed_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - python=3.12.3=hab00c5b_0_cpython - - python_abi=3.12=4_cp312 - - r-anytime=0.3.9=r43ha503ecb_2 - - r-ape=5.8=r43h08d816e_0 - - r-aplot=0.2.2=r43hc72bb7e_0 - - r-ashr=2.2_63=r43ha503ecb_0 - - r-askpass=1.2.0=r43h57805ef_0 - - r-assertthat=0.2.1=r43hc72bb7e_4 - - r-babelgene=22.9=r43hc72bb7e_2 - - r-backbone=2.1.3=r43ha503ecb_0 - - r-backports=1.4.1=r43h57805ef_2 - - r-base=4.3.3=hf0d99cb_1 - - r-base64enc=0.1_3=r43h57805ef_1006 - - r-bbmle=1.0.25.1=r43hc72bb7e_0 - - r-bdsmatrix=1.3_7=r43h57805ef_0 - - r-bh=1.84.0_0=r43hc72bb7e_0 - - r-biocmanager=1.30.23=r43hc72bb7e_0 - - r-bit=4.0.5=r43h57805ef_1 - - r-bit64=4.0.5=r43h57805ef_2 - - r-bitops=1.0_7=r43h57805ef_2 - - r-blob=1.2.4=r43hc72bb7e_1 - - r-brew=1.0_10=r43hc72bb7e_0 - - r-brio=1.1.5=r43hb1dbf0f_0 - - r-broom=1.0.5=r43hc72bb7e_1 - - r-broom.helpers=1.15.0=r43hc72bb7e_0 - - r-bs4dash=2.3.3=r43hc72bb7e_0 - - r-bslib=0.7.0=r43hc72bb7e_0 - - r-ca=0.71.1=r43hc72bb7e_3 - - r-cachem=1.0.8=r43h57805ef_1 - - r-callr=3.7.6=r43hc72bb7e_0 - - r-circlize=0.4.16=r43hc72bb7e_0 - - r-cli=3.6.2=r43ha503ecb_0 - - r-clipr=0.8.0=r43hc72bb7e_2 - - r-clue=0.3_65=r43h57805ef_0 - - r-cluster=2.1.6=r43h61816a4_0 - - r-coda=0.19_4.1=r43hc72bb7e_0 - - r-codetools=0.2_20=r43hc72bb7e_0 - - r-colorspace=2.1_0=r43h57805ef_1 - - r-colourpicker=1.3.0=r43hc72bb7e_0 - - r-commonmark=1.9.1=r43h57805ef_0 - - r-complexupset=1.3.3=r43hc72bb7e_2 - - r-cowplot=1.1.3=r43hc72bb7e_0 - - r-cpp11=0.4.7=r43hc72bb7e_0 - - r-crayon=1.5.2=r43hc72bb7e_2 - - r-credentials=2.0.1=r43hc72bb7e_0 - - r-crosstalk=1.2.1=r43hc72bb7e_0 - - r-curl=5.1.0=r43hf9611b0_0 - - r-data.table=1.15.2=r43h029312a_0 - - r-dbi=1.2.2=r43hc72bb7e_0 - - r-dbplyr=2.5.0=r43hc72bb7e_0 - - r-dendextend=1.17.1=r43hc72bb7e_1 - - r-desc=1.4.3=r43hc72bb7e_0 - - r-devtools=2.4.5=r43hc72bb7e_2 - - r-diffobj=0.3.5=r43h57805ef_2 - - r-digest=0.6.35=r43ha503ecb_0 - - r-doparallel=1.0.17=r43hc72bb7e_2 - - r-downlit=0.4.3=r43hc72bb7e_0 - - r-downloader=0.4=r43hc72bb7e_1005 - - r-dplyr=1.1.4=r43ha503ecb_0 - - r-dt=0.33=r43hc72bb7e_0 - - r-dynamictreecut=1.63_1=r43hc72bb7e_1006 - - r-egg=0.4.5=r43hc72bb7e_4 - - r-ellipsis=0.3.2=r43h57805ef_2 - - r-emdbook=1.3.13=r43hc72bb7e_0 - - r-etrunct=0.1=r43hc72bb7e_1005 - - r-evaluate=0.23=r43hc72bb7e_0 - - r-expm=0.999_9=r43hd9ac46e_0 - - r-fansi=1.0.6=r43h57805ef_0 - - r-farver=2.1.1=r43ha503ecb_2 - - r-fastmap=1.1.1=r43ha503ecb_1 - - r-fastmatch=1.1_4=r43h57805ef_0 - - r-fdrtool=1.2.17=r43h57805ef_2 - - r-filelock=1.0.3=r43h57805ef_0 - - r-fontawesome=0.5.2=r43hc72bb7e_0 - - r-forcats=1.0.0=r43hc72bb7e_1 - - r-foreach=1.5.2=r43hc72bb7e_2 - - r-formatr=1.14=r43hc72bb7e_1 - - r-fresh=0.2.0=r43hc72bb7e_2 - - r-fs=1.6.4=r43ha18555a_0 - - r-futile.logger=1.4.3=r43hc72bb7e_1005 - - r-futile.options=1.0.1=r43hc72bb7e_1004 - - r-gclus=1.3.2=r43hc72bb7e_4 - - r-generics=0.1.3=r43hc72bb7e_2 - - r-gert=2.0.1=r43h9b95477_1 - - r-getoptlong=1.0.5=r43hc72bb7e_2 - - r-ggally=2.2.1=r43hc72bb7e_0 - - r-ggdendro=0.2.0=r43hc72bb7e_0 - - r-ggforce=0.4.2=r43ha503ecb_0 - - r-ggfun=0.1.4=r43hc72bb7e_0 - - r-ggnewscale=0.4.10=r43hc72bb7e_0 - - r-ggplot2=3.5.1=r43hc72bb7e_0 - - r-ggplotify=0.1.2=r43hc72bb7e_0 - - r-ggraph=2.1.0=r43ha503ecb_2 - - r-ggrepel=0.9.5=r43ha503ecb_0 - - r-ggridges=0.5.6=r43hc72bb7e_0 - - r-ggstats=0.6.0=r43hc72bb7e_0 - - r-gh=1.4.1=r43hc72bb7e_0 - - r-gitcreds=0.1.2=r43hc72bb7e_2 - - r-globaloptions=0.1.2=r43ha770c72_2 - - r-glue=1.7.0=r43h57805ef_0 - - r-graphlayouts=1.1.0=r43ha503ecb_0 - - r-gridextra=2.3=r43hc72bb7e_1005 - - r-gridgraphics=0.5_1=r43hc72bb7e_2 - - r-gson=0.1.0=r43hc72bb7e_1 - - r-gtable=0.3.5=r43hc72bb7e_0 - - r-haven=2.5.4=r43ha503ecb_0 - - r-heatmaply=1.5.0=r43hc72bb7e_0 - - r-hexbin=1.28.3=r43h61816a4_1 - - r-highr=0.10=r43hc72bb7e_1 - - r-hms=1.1.3=r43hc72bb7e_1 - - r-htmltools=0.5.8.1=r43ha503ecb_0 - - r-htmlwidgets=1.6.4=r43hc72bb7e_1 - - r-httpuv=1.6.15=r43ha503ecb_0 - - r-httr=1.4.7=r43hc72bb7e_0 - - r-httr2=1.0.1=r43hc72bb7e_0 - - r-igraph=2.0.3=r43hbec7d4a_0 - - r-ini=0.3.1=r43hc72bb7e_1005 - - r-invgamma=1.1=r43hc72bb7e_3 - - r-irlba=2.3.5.1=r43h316c678_1 - - r-isoband=0.2.7=r43ha503ecb_2 - - r-iterators=1.0.14=r43hc72bb7e_2 - - r-jquerylib=0.1.4=r43hc72bb7e_2 - - r-jsonlite=1.8.8=r43h57805ef_0 - - r-knitr=1.46=r43hc72bb7e_0 - - r-labeling=0.4.3=r43hc72bb7e_0 - - r-labelled=2.13.0=r43hc72bb7e_0 - - r-lambda.r=1.2.4=r43hc72bb7e_3 - - r-later=1.3.2=r43ha503ecb_0 - - r-lattice=0.22_6=r43h57805ef_0 - - r-lazyeval=0.2.2=r43h57805ef_4 - - r-lifecycle=1.0.4=r43hc72bb7e_0 - - r-locfit=1.5_9.9=r43h57805ef_0 - - r-logging=0.10_108=r43ha770c72_4 - - r-magrittr=2.0.3=r43h57805ef_2 - - r-mass=7.3_60=r43h57805ef_1 - - r-matrix=1.6_5=r43h316c678_0 - - r-matrixstats=1.3.0=r43h57805ef_0 - - r-memoise=2.0.1=r43hc72bb7e_2 - - r-mgcv=1.9_1=r43h316c678_0 - - r-mime=0.12=r43h57805ef_2 - - r-miniui=0.1.1.1=r43hc72bb7e_1004 - - r-mixsqp=0.3_54=r43h08d816e_0 - - r-mnormt=2.1.1=r43h61816a4_1 - - r-msigdbr=7.5.1=r43hc72bb7e_2 - - r-munsell=0.5.1=r43hc72bb7e_0 - - r-mvtnorm=1.2_4=r43hd9ac46e_0 - - r-network=1.18.2=r43h57805ef_0 - - r-nlme=3.1_164=r43h61816a4_0 - - r-numderiv=2016.8_1.1=r43hc72bb7e_5 - - r-openssl=2.1.2=r43hd7d0243_0 - - r-openxlsx=4.2.5.2=r43ha503ecb_1 - - r-patchwork=1.2.0=r43hc72bb7e_0 - - r-permute=0.9_7=r43hc72bb7e_2 - - r-pheatmap=1.0.12=r43hc72bb7e_4 - - r-pillar=1.9.0=r43hc72bb7e_1 - - r-pkgbuild=1.4.4=r43hc72bb7e_0 - - r-pkgconfig=2.0.3=r43hc72bb7e_3 - - r-pkgdown=2.0.9=r43hc72bb7e_0 - - r-pkgload=1.3.4=r43hc72bb7e_0 - - r-plogr=0.2.0=r43hc72bb7e_1005 - - r-plotly=4.10.4=r43hc72bb7e_0 - - r-plyr=1.8.9=r43ha503ecb_0 - - r-png=0.1_8=r43h81d01c5_1 - - r-poissonbinomial=1.2.6=r43h182e94e_0 - - r-polyclip=1.10_6=r43ha503ecb_0 - - r-praise=1.0.0=r43hc72bb7e_1007 - - r-prettyunits=1.2.0=r43hc72bb7e_0 - - r-processx=3.8.4=r43h57805ef_0 - - r-profvis=0.3.8=r43h57805ef_3 - - r-progress=1.2.3=r43hc72bb7e_0 - - r-promises=1.3.0=r43ha503ecb_0 - - r-ps=1.7.6=r43h57805ef_0 - - r-psych=2.4.3=r43hc72bb7e_0 - - r-purrr=1.0.2=r43h57805ef_0 - - r-qap=0.1_2=r43h61816a4_2 - - r-r6=2.5.1=r43hc72bb7e_2 - - r-ragg=1.3.1=r43hffa04bc_0 - - r-rappdirs=0.3.3=r43h57805ef_2 - - r-rcmdcheck=1.4.0=r43h785f33e_2 - - r-rcolorbrewer=1.1_3=r43h785f33e_2 - - r-rcpp=1.0.12=r43h7df8631_0 - - r-rcpparmadillo=0.12.8.2.1=r43h08d816e_0 - - r-rcppeigen=0.3.4.0.0=r43h08d816e_0 - - r-rcppnumerical=0.6_0=r43ha503ecb_0 - - r-rcurl=1.98_1.14=r43hf9611b0_0 - - r-readr=2.1.5=r43ha503ecb_0 - - r-registry=0.5_1=r43hc72bb7e_4 - - r-rematch2=2.1.2=r43hc72bb7e_3 - - r-remotes=2.5.0=r43hc72bb7e_0 - - r-reshape=0.8.9=r43hc72bb7e_2 - - r-reshape2=1.4.4=r43ha503ecb_3 - - r-restfulr=0.0.15=r43h56115f1_3 - - r-rintrojs=0.3.4=r43hc72bb7e_0 - - r-rjson=0.2.21=r43ha503ecb_3 - - r-rlang=1.1.3=r43ha503ecb_0 - - r-rle=0.9.2=r43h57805ef_2 - - r-rmarkdown=2.26=r43hc72bb7e_0 - - r-roxygen2=7.3.1=r43ha503ecb_0 - - r-rprojroot=2.0.4=r43hc72bb7e_0 - - r-rsqlite=2.3.4=r43ha503ecb_0 - - r-rstudioapi=0.16.0=r43hc72bb7e_0 - - r-rvcheck=0.2.1=r43hc72bb7e_2 - - r-rversions=2.1.2=r43hc72bb7e_2 - - r-sass=0.4.9=r43ha503ecb_0 - - r-scales=1.3.0=r43hc72bb7e_0 - - r-scatterpie=0.2.2=r43hc72bb7e_0 - - r-seriation=1.5.5=r43h61816a4_0 - - r-sessioninfo=1.2.2=r43hc72bb7e_2 - - r-shadowtext=0.1.3=r43hc72bb7e_0 - - r-shape=1.4.6.1=r43ha770c72_0 - - r-shiny=1.8.1.1=r43h785f33e_0 - - r-shinyace=0.4.2=r43hc72bb7e_2 - - r-shinycssloaders=1.0.0=r43hc72bb7e_2 - - r-shinyjs=2.1.0=r43hc72bb7e_2 - - r-shinywidgets=0.8.6=r43hc72bb7e_0 - - r-slam=0.1_50=r43h1df0287_3 - - r-snow=0.4_4=r43hc72bb7e_2 - - r-sourcetools=0.1.7_1=r43ha503ecb_1 - - r-sparsem=1.81=r43h61816a4_2 - - r-squarem=2021.1=r43hc72bb7e_2 - - r-statnet.common=4.9.0=r43h57805ef_1 - - r-stringi=1.8.4=r43hbd1cc82_0 - - r-stringr=1.5.1=r43h785f33e_0 - - r-survival=3.6_4=r43hb1dbf0f_0 - - r-sys=3.4.2=r43h57805ef_1 - - r-systemfonts=1.0.5=r43haf97adc_0 - - r-testthat=3.2.1.1=r43ha503ecb_0 - - r-textshaping=0.3.7=r43hd87b9d6_0 - - r-tibble=3.2.1=r43h57805ef_2 - - r-tidygraph=1.3.0=r43ha503ecb_0 - - r-tidyr=1.3.1=r43ha503ecb_0 - - r-tidyselect=1.2.1=r43hc72bb7e_0 - - r-tidytree=0.4.6=r43hc72bb7e_0 - - r-tinytex=0.51=r43hc72bb7e_0 - - r-tippy=0.1.0=r43hc72bb7e_2 - - r-tmvnsim=1.0_2=r43h61816a4_5 - - r-truncnorm=1.0_9=r43h57805ef_1 - - r-tsp=1.2_4=r43h57805ef_1 - - r-tweenr=2.0.3=r43ha503ecb_0 - - r-tzdb=0.4.0=r43ha503ecb_1 - - r-upsetr=1.4.0=r43hc72bb7e_4 - - r-urlchecker=1.0.1=r43hc72bb7e_2 - - r-usethis=2.2.3=r43hc72bb7e_0 - - r-utf8=1.2.4=r43h57805ef_0 - - r-vctrs=0.6.5=r43ha503ecb_0 - - r-vegan=2.6_4=r43hd9ac46e_1 - - r-viridis=0.6.5=r43hc72bb7e_0 - - r-viridislite=0.4.2=r43hc72bb7e_1 - - r-visnetwork=2.1.2=r43hc72bb7e_2 - - r-vroom=1.6.5=r43ha503ecb_0 - - r-waiter=0.2.5=r43hc72bb7e_2 - - r-waldo=0.5.2=r43hc72bb7e_0 - - r-webshot=0.5.5=r43hc72bb7e_0 - - r-whisker=0.4.1=r43hc72bb7e_1 - - r-withr=3.0.0=r43hc72bb7e_0 - - r-xfun=0.43=r43ha503ecb_0 - - r-xml=3.99_0.16.1=r43hc6530ce_0 - - r-xml2=1.3.6=r43hbfba7a4_1 - - r-xopen=1.0.1=r43hc72bb7e_0 - - r-xtable=1.8_4=r43hc72bb7e_5 - - r-yaml=2.3.8=r43h57805ef_0 - - r-yulab.utils=0.1.4=r43hc72bb7e_0 - - r-zip=2.3.1=r43h57805ef_0 - - readline=8.2=h8228510_1 - - sed=4.8=he412f7d_0 - - setuptools=69.5.1=pyhd8ed1ab_0 - - sysroot_linux-64=2.12=he073ed8_17 - - tk=8.6.13=noxft_h4845f30_101 - - tktable=2.10=h0c5db8f_5 - - toml=0.10.2=pyhd8ed1ab_0 - - tomlkit=0.12.4=pyha770c72_0 - - tzdata=2024a=h0c530f3_0 - - wheel=0.43.0=pyhd8ed1ab_1 - - xmltodict=0.13.0=pyhd8ed1ab_0 - - xorg-kbproto=1.0.7=h14c3975_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.9=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h516909a_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxrender=0.9.11=hd590300_0 - - xorg-libxt=1.3.0=hd590300_1 - - xorg-renderproto=0.11.1=h14c3975_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h14c3975_1007 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - yq=3.4.3=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.6=ha6fb4c9_0 + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - _r-mutex=1.0.1 + - argcomplete=3.3.0 + - binutils_impl_linux-64=2.40 + - bioconductor-all=1.42.0 + - bioconductor-annotate=1.78.0 + - bioconductor-annotationdbi=1.62.2 + - bioconductor-annotationhub=3.8.0 + - bioconductor-apeglm=1.22.1 + - bioconductor-biobase=2.60.0 + - bioconductor-biocfilecache=2.8.0 + - bioconductor-biocgenerics=0.46.0 + - bioconductor-biocio=1.10.0 + - bioconductor-biocparallel=1.34.2 + - bioconductor-biocversion=3.17.1 + - bioconductor-biomart=2.56.1 + - bioconductor-biostrings=2.68.1 + - bioconductor-clusterprofiler=4.8.1 + - bioconductor-complexheatmap=2.16.0 + - bioconductor-consensusclusterplus=1.64.0 + - bioconductor-data-packages=20231203 + - bioconductor-degreport=1.36.0 + - bioconductor-delayedarray=0.26.6 + - bioconductor-deseq2=1.40.2 + - bioconductor-dose=3.26.1 + - bioconductor-edger=3.42.4 + - bioconductor-enrichplot=1.20.0 + - bioconductor-fgsea=1.26.0 + - bioconductor-genefilter=1.82.1 + - bioconductor-genetonic=2.4.0 + - bioconductor-genomeinfodb=1.36.1 + - bioconductor-genomeinfodbdata=1.2.11 + - bioconductor-genomicalignments=1.36.0 + - bioconductor-genomicfeatures=1.52.1 + - bioconductor-genomicranges=1.52.0 + - bioconductor-ggtree=3.8.0 + - bioconductor-go.db=3.17.0 + - bioconductor-gosemsim=2.26.0 + - bioconductor-hdo.db=0.99.1 + - bioconductor-ihw=1.28.0 + - bioconductor-interactivedisplaybase=1.38.0 + - bioconductor-iranges=2.34.1 + - bioconductor-keggrest=1.40.0 + - bioconductor-limma=3.56.2 + - bioconductor-lpsymphony=1.28.1 + - bioconductor-matrixgenerics=1.12.2 + - bioconductor-qvalue=2.32.0 + - bioconductor-rhdf5=2.44.0 + - bioconductor-rhdf5filters=1.12.1 + - bioconductor-rhdf5lib=1.22.0 + - bioconductor-rhtslib=2.2.0 + - bioconductor-rsamtools=2.16.0 + - bioconductor-rtracklayer=1.60.0 + - bioconductor-s4arrays=1.0.4 + - bioconductor-s4vectors=0.38.1 + - bioconductor-summarizedexperiment=1.30.2 + - bioconductor-sva=3.48.0 + - bioconductor-treeio=1.24.1 + - bioconductor-tximport=1.30.0 + - bioconductor-xvector=0.40.0 + - bioconductor-zlibbioc=1.46.0 + - bwidget=1.9.14 + - bzip2=1.0.8 + - c-ares=1.28.1 + - ca-certificates=2024.2.2 + - cairo=1.18.0 + - curl=8.7.1 + - expat=2.6.2 + - fftw=3.3.10 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.14.2 + - fonts-conda-ecosystem=1 + - fonts-conda-forge=1 + - freetype=2.12.1 + - fribidi=1.0.10 + - gcc_impl_linux-64=13.2.0 + - gfortran_impl_linux-64=13.2.0 + - glpk=5.0 + - gmp=6.3.0 + - graphite2=1.3.13 + - gxx_impl_linux-64=13.2.0 + - harfbuzz=8.4.0 + - icu=73.2 + - jq=1.7.1 + - kernel-headers_linux-64=2.6.32 + - keyutils=1.6.1 + - krb5=1.21.2 + - ld_impl_linux-64=2.40 + - lerc=4.0.0 + - libblas=3.9.0 + - libcurl=8.7.1 + - libdeflate=1.20 + - libedit=3.1.20191231 + - libev=4.33 + - libexpat=2.6.2 + - libffi=3.4.2 + - libgcc-devel_linux-64=13.2.0 + - libgcc-ng=13.2.0 + - libgfortran-ng=13.2.0 + - libgfortran5=13.2.0 + - libgit2=1.8.0 + - libglib=2.80.0 + - libgomp=13.2.0 + - libiconv=1.17 + - libjpeg-turbo=3.0.0 + - liblapack=3.9.0 + - libnghttp2=1.58.0 + - libnsl=2.0.1 + - libopenblas=0.3.27 + - libpng=1.6.43 + - libsanitizer=13.2.0 + - libsqlite=3.45.3 + - libssh2=1.11.0 + - libstdcxx-devel_linux-64=13.2.0 + - libstdcxx-ng=13.2.0 + - libtiff=4.6.0 + - libuuid=2.38.1 + - libwebp-base=1.4.0 + - libxcb=1.15 + - libxcrypt=4.4.36 + - libxml2=2.12.6 + - libzlib=1.2.13 + - make=4.3 + - ncurses=6.4.20240210 + - oniguruma=6.9.9 + - openssl=3.3.0 + - pandoc=3.1.13 + - pango=1.52.2 + - pcre2=10.43 + - pip=24.0 + - pixman=0.43.2 + - pthread-stubs=0.4 + - python=3.12.3 + - python_abi=3.12 + - r-anytime=0.3.9 + - r-ape=5.8 + - r-aplot=0.2.2 + - r-ashr=2.2_63 + - r-askpass=1.2.0 + - r-assertthat=0.2.1 + - r-babelgene=22.9 + - r-backbone=2.1.3 + - r-backports=1.4.1 + - r-base=4.3.3 + - r-base64enc=0.1_3 + - r-bbmle=1.0.25.1 + - r-bdsmatrix=1.3_7 + - r-bh=1.84.0_0 + - r-biocmanager=1.30.23 + - r-bit=4.0.5 + - r-bit64=4.0.5 + - r-bitops=1.0_7 + - r-blob=1.2.4 + - r-brew=1.0_10 + - r-brio=1.1.5 + - r-broom=1.0.5 + - r-broom.helpers=1.15.0 + - r-bs4dash=2.3.3 + - r-bslib=0.7.0 + - r-ca=0.71.1 + - r-cachem=1.0.8 + - r-callr=3.7.6 + - r-circlize=0.4.16 + - r-cli=3.6.2 + - r-clipr=0.8.0 + - r-clue=0.3_65 + - r-cluster=2.1.6 + - r-coda=0.19_4.1 + - r-codetools=0.2_20 + - r-colorspace=2.1_0 + - r-colourpicker=1.3.0 + - r-commonmark=1.9.1 + - r-complexupset=1.3.3 + - r-cowplot=1.1.3 + - r-cpp11=0.4.7 + - r-crayon=1.5.2 + - r-credentials=2.0.1 + - r-crosstalk=1.2.1 + - r-curl=5.1.0 + - r-data.table=1.15.2 + - r-dbi=1.2.2 + - r-dbplyr=2.5.0 + - r-dendextend=1.17.1 + - r-desc=1.4.3 + - r-devtools=2.4.5 + - r-diffobj=0.3.5 + - r-digest=0.6.35 + - r-doparallel=1.0.17 + - r-downlit=0.4.3 + - r-downloader=0.4 + - r-dplyr=1.1.4 + - r-dt=0.33 + - r-dynamictreecut=1.63_1 + - r-egg=0.4.5 + - r-ellipsis=0.3.2 + - r-emdbook=1.3.13 + - r-etrunct=0.1 + - r-evaluate=0.23 + - r-expm=0.999_9 + - r-fansi=1.0.6 + - r-farver=2.1.1 + - r-fastmap=1.1.1 + - r-fastmatch=1.1_4 + - r-fdrtool=1.2.17 + - r-filelock=1.0.3 + - r-fontawesome=0.5.2 + - r-forcats=1.0.0 + - r-foreach=1.5.2 + - r-formatr=1.14 + - r-fresh=0.2.0 + - r-fs=1.6.4 + - r-futile.logger=1.4.3 + - r-futile.options=1.0.1 + - r-gclus=1.3.2 + - r-generics=0.1.3 + - r-gert=2.0.1 + - r-getoptlong=1.0.5 + - r-ggally=2.2.1 + - r-ggdendro=0.2.0 + - r-ggforce=0.4.2 + - r-ggfun=0.1.4 + - r-ggnewscale=0.4.10 + - r-ggplot2=3.5.1 + - r-ggplotify=0.1.2 + - r-ggraph=2.1.0 + - r-ggrepel=0.9.5 + - r-ggridges=0.5.6 + - r-ggstats=0.6.0 + - r-gh=1.4.1 + - r-gitcreds=0.1.2 + - r-globaloptions=0.1.2 + - r-glue=1.7.0 + - r-graphlayouts=1.1.0 + - r-gridextra=2.3 + - r-gridgraphics=0.5_1 + - r-gson=0.1.0 + - r-gtable=0.3.5 + - r-haven=2.5.4 + - r-heatmaply=1.5.0 + - r-hexbin=1.28.3 + - r-highr=0.10 + - r-hms=1.1.3 + - r-htmltools=0.5.8.1 + - r-htmlwidgets=1.6.4 + - r-httpuv=1.6.15 + - r-httr=1.4.7 + - r-httr2=1.0.1 + - r-igraph=2.0.3 + - r-ini=0.3.1 + - r-invgamma=1.1 + - r-irlba=2.3.5.1 + - r-isoband=0.2.7 + - r-iterators=1.0.14 + - r-jquerylib=0.1.4 + - r-jsonlite=1.8.8 + - r-knitr=1.46 + - r-labeling=0.4.3 + - r-labelled=2.13.0 + - r-lambda.r=1.2.4 + - r-later=1.3.2 + - r-lattice=0.22_6 + - r-lazyeval=0.2.2 + - r-lifecycle=1.0.4 + - r-locfit=1.5_9.9 + - r-logging=0.10_108 + - r-magrittr=2.0.3 + - r-mass=7.3_60 + - r-matrix=1.6_5 + - r-matrixstats=1.3.0 + - r-memoise=2.0.1 + - r-mgcv=1.9_1 + - r-mime=0.12 + - r-miniui=0.1.1.1 + - r-mixsqp=0.3_54 + - r-mnormt=2.1.1 + - r-msigdbr=7.5.1 + - r-munsell=0.5.1 + - r-mvtnorm=1.2_4 + - r-network=1.18.2 + - r-nlme=3.1_164 + - r-numderiv=2016.8_1.1 + - r-openssl=2.1.2 + - r-openxlsx=4.2.5.2 + - r-patchwork=1.2.0 + - r-permute=0.9_7 + - r-pheatmap=1.0.12 + - r-pillar=1.9.0 + - r-pkgbuild=1.4.4 + - r-pkgconfig=2.0.3 + - r-pkgdown=2.0.9 + - r-pkgload=1.3.4 + - r-plogr=0.2.0 + - r-plotly=4.10.4 + - r-plyr=1.8.9 + - r-png=0.1_8 + - r-poissonbinomial=1.2.6 + - r-polyclip=1.10_6 + - r-praise=1.0.0 + - r-prettyunits=1.2.0 + - r-processx=3.8.4 + - r-profvis=0.3.8 + - r-progress=1.2.3 + - r-promises=1.3.0 + - r-ps=1.7.6 + - r-psych=2.4.3 + - r-purrr=1.0.2 + - r-qap=0.1_2 + - r-r6=2.5.1 + - r-ragg=1.3.1 + - r-rappdirs=0.3.3 + - r-rcmdcheck=1.4.0 + - r-rcolorbrewer=1.1_3 + - r-rcpp=1.0.12 + - r-rcpparmadillo=0.12.8.2.1 + - r-rcppeigen=0.3.4.0.0 + - r-rcppnumerical=0.6_0 + - r-rcurl=1.98_1.14 + - r-readr=2.1.5 + - r-registry=0.5_1 + - r-rematch2=2.1.2 + - r-remotes=2.5.0 + - r-reshape=0.8.9 + - r-reshape2=1.4.4 + - r-restfulr=0.0.15 + - r-rintrojs=0.3.4 + - r-rjson=0.2.21 + - r-rlang=1.1.3 + - r-rle=0.9.2 + - r-rmarkdown=2.26 + - r-roxygen2=7.3.1 + - r-rprojroot=2.0.4 + - r-rsqlite=2.3.4 + - r-rstudioapi=0.16.0 + - r-rvcheck=0.2.1 + - r-rversions=2.1.2 + - r-sass=0.4.9 + - r-scales=1.3.0 + - r-scatterpie=0.2.2 + - r-seriation=1.5.5 + - r-sessioninfo=1.2.2 + - r-shadowtext=0.1.3 + - r-shape=1.4.6.1 + - r-shiny=1.8.1.1 + - r-shinyace=0.4.2 + - r-shinycssloaders=1.0.0 + - r-shinyjs=2.1.0 + - r-shinywidgets=0.8.6 + - r-slam=0.1_50 + - r-snow=0.4_4 + - r-sourcetools=0.1.7_1 + - r-sparsem=1.81 + - r-squarem=2021.1 + - r-statnet.common=4.9.0 + - r-stringi=1.8.4 + - r-stringr=1.5.1 + - r-survival=3.6_4 + - r-sys=3.4.2 + - r-systemfonts=1.0.5 + - r-testthat=3.2.1.1 + - r-textshaping=0.3.7 + - r-tibble=3.2.1 + - r-tidygraph=1.3.0 + - r-tidyr=1.3.1 + - r-tidyselect=1.2.1 + - r-tidytree=0.4.6 + - r-tinytex=0.51 + - r-tippy=0.1.0 + - r-tmvnsim=1.0_2 + - r-truncnorm=1.0_9 + - r-tsp=1.2_4 + - r-tweenr=2.0.3 + - r-tzdb=0.4.0 + - r-upsetr=1.4.0 + - r-urlchecker=1.0.1 + - r-usethis=2.2.3 + - r-utf8=1.2.4 + - r-vctrs=0.6.5 + - r-vegan=2.6_4 + - r-viridis=0.6.5 + - r-viridislite=0.4.2 + - r-visnetwork=2.1.2 + - r-vroom=1.6.5 + - r-waiter=0.2.5 + - r-waldo=0.5.2 + - r-webshot=0.5.5 + - r-whisker=0.4.1 + - r-withr=3.0.0 + - r-xfun=0.43 + - r-xml=3.99_0.16.1 + - r-xml2=1.3.6 + - r-xopen=1.0.1 + - r-xtable=1.8_4 + - r-yaml=2.3.8 + - r-yulab.utils=0.1.4 + - r-zip=2.3.1 + - readline=8.2 + - sed=4.8 + - setuptools=69.5.1 + - sysroot_linux-64=2.12 + - tk=8.6.13 + - tktable=2.10 + - toml=0.10.2 + - tomlkit=0.12.4 + - tzdata=2024a + - wheel=0.43.0 + - xmltodict=0.13.0 + - xorg-kbproto=1.0.7 + - xorg-libice=1.1.1 + - xorg-libsm=1.2.4 + - xorg-libx11=1.8.9 + - xorg-libxau=1.0.11 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.11 + - xorg-libxt=1.3.0 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.2.6 + - yaml=0.2.5 + - yq=3.4.3 + - zlib=1.2.13 + - zstd=1.5.6 - pip: - pyyaml==6.0.1 From 2e018d89ac4aaf7833fed1ae1bf12f19cbe600ca Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 15:57:17 -0400 Subject: [PATCH 20/93] add pinnings to help env solve --- include/requirements-r.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/requirements-r.txt b/include/requirements-r.txt index 21603eaf..66c8435f 100644 --- a/include/requirements-r.txt +++ b/include/requirements-r.txt @@ -1,3 +1,4 @@ +bioconductor-all <1.42.0 bioconductor-annotationhub bioconductor-apeglm bioconductor-biocparallel @@ -12,14 +13,14 @@ bioconductor-rhdf5 bioconductor-sva bioconductor-tximport r-ashr -r-base +r-base >4.1 +r-dbplyr <2.4 r-devtools r-dt r-ggally r-ggnewscale r-ggrepel r-heatmaply -r-igraph r-knitr r-msigdbr r-openxlsx From 2b5627b7d441eef8928990e0a00ba8ecbcfe801d Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 15:57:30 -0400 Subject: [PATCH 21/93] updt env-r.yml --- env-r.yml | 127 +++++++++++++++++++++++++++--------------------------- 1 file changed, 63 insertions(+), 64 deletions(-) diff --git a/env-r.yml b/env-r.yml index c871259b..d1d366b3 100644 --- a/env-r.yml +++ b/env-r.yml @@ -7,63 +7,63 @@ dependencies: - _r-mutex=1.0.1 - argcomplete=3.3.0 - binutils_impl_linux-64=2.40 - - bioconductor-all=1.42.0 - - bioconductor-annotate=1.78.0 - - bioconductor-annotationdbi=1.62.2 - - bioconductor-annotationhub=3.8.0 - - bioconductor-apeglm=1.22.1 - - bioconductor-biobase=2.60.0 - - bioconductor-biocfilecache=2.8.0 - - bioconductor-biocgenerics=0.46.0 - - bioconductor-biocio=1.10.0 - - bioconductor-biocparallel=1.34.2 - - bioconductor-biocversion=3.17.1 - - bioconductor-biomart=2.56.1 - - bioconductor-biostrings=2.68.1 - - bioconductor-clusterprofiler=4.8.1 - - bioconductor-complexheatmap=2.16.0 - - bioconductor-consensusclusterplus=1.64.0 + - bioconductor-all=1.40.0 + - bioconductor-annotate=1.76.0 + - bioconductor-annotationdbi=1.60.0 + - bioconductor-annotationhub=3.6.0 + - bioconductor-apeglm=1.20.0 + - bioconductor-biobase=2.58.0 + - bioconductor-biocfilecache=2.6.0 + - bioconductor-biocgenerics=0.44.0 + - bioconductor-biocio=1.8.0 + - bioconductor-biocparallel=1.32.5 + - bioconductor-biocversion=3.16.0 + - bioconductor-biomart=2.54.0 + - bioconductor-biostrings=2.66.0 + - bioconductor-clusterprofiler=4.6.0 + - bioconductor-complexheatmap=2.14.0 + - bioconductor-consensusclusterplus=1.62.0 - bioconductor-data-packages=20231203 - - bioconductor-degreport=1.36.0 - - bioconductor-delayedarray=0.26.6 - - bioconductor-deseq2=1.40.2 - - bioconductor-dose=3.26.1 - - bioconductor-edger=3.42.4 - - bioconductor-enrichplot=1.20.0 - - bioconductor-fgsea=1.26.0 - - bioconductor-genefilter=1.82.1 - - bioconductor-genetonic=2.4.0 - - bioconductor-genomeinfodb=1.36.1 - - bioconductor-genomeinfodbdata=1.2.11 - - bioconductor-genomicalignments=1.36.0 - - bioconductor-genomicfeatures=1.52.1 - - bioconductor-genomicranges=1.52.0 - - bioconductor-ggtree=3.8.0 - - bioconductor-go.db=3.17.0 - - bioconductor-gosemsim=2.26.0 + - bioconductor-degreport=1.34.0 + - bioconductor-delayedarray=0.24.0 + - bioconductor-deseq2=1.38.0 + - bioconductor-dose=3.24.0 + - bioconductor-edger=3.40.0 + - bioconductor-enrichplot=1.18.0 + - bioconductor-fgsea=1.24.0 + - bioconductor-genefilter=1.80.0 + - bioconductor-geneplotter=1.76.0 + - bioconductor-genetonic=2.2.0 + - bioconductor-genomeinfodb=1.34.9 + - bioconductor-genomeinfodbdata=1.2.9 + - bioconductor-genomicalignments=1.34.0 + - bioconductor-genomicfeatures=1.50.2 + - bioconductor-genomicranges=1.50.0 + - bioconductor-ggtree=3.6.0 + - bioconductor-go.db=3.16.0 + - bioconductor-gosemsim=2.24.0 - bioconductor-hdo.db=0.99.1 - - bioconductor-ihw=1.28.0 - - bioconductor-interactivedisplaybase=1.38.0 - - bioconductor-iranges=2.34.1 - - bioconductor-keggrest=1.40.0 - - bioconductor-limma=3.56.2 - - bioconductor-lpsymphony=1.28.1 - - bioconductor-matrixgenerics=1.12.2 - - bioconductor-qvalue=2.32.0 - - bioconductor-rhdf5=2.44.0 - - bioconductor-rhdf5filters=1.12.1 - - bioconductor-rhdf5lib=1.22.0 - - bioconductor-rhtslib=2.2.0 - - bioconductor-rsamtools=2.16.0 - - bioconductor-rtracklayer=1.60.0 - - bioconductor-s4arrays=1.0.4 - - bioconductor-s4vectors=0.38.1 - - bioconductor-summarizedexperiment=1.30.2 - - bioconductor-sva=3.48.0 - - bioconductor-treeio=1.24.1 - - bioconductor-tximport=1.30.0 - - bioconductor-xvector=0.40.0 - - bioconductor-zlibbioc=1.46.0 + - bioconductor-ihw=1.26.0 + - bioconductor-interactivedisplaybase=1.36.0 + - bioconductor-iranges=2.32.0 + - bioconductor-keggrest=1.38.0 + - bioconductor-limma=3.54.0 + - bioconductor-lpsymphony=1.26.0 + - bioconductor-matrixgenerics=1.10.0 + - bioconductor-qvalue=2.30.0 + - bioconductor-rhdf5=2.42.0 + - bioconductor-rhdf5filters=1.10.0 + - bioconductor-rhdf5lib=1.20.0 + - bioconductor-rhtslib=2.0.0 + - bioconductor-rsamtools=2.14.0 + - bioconductor-rtracklayer=1.58.0 + - bioconductor-s4vectors=0.36.0 + - bioconductor-summarizedexperiment=1.28.0 + - bioconductor-sva=3.46.0 + - bioconductor-treeio=1.22.0 + - bioconductor-tximport=1.26.0 + - bioconductor-xvector=0.38.0 + - bioconductor-zlibbioc=1.44.0 - bwidget=1.9.14 - bzip2=1.0.8 - c-ares=1.28.1 @@ -107,7 +107,7 @@ dependencies: - libgfortran-ng=13.2.0 - libgfortran5=13.2.0 - libgit2=1.8.0 - - libglib=2.80.0 + - libglib=2.80.2 - libgomp=13.2.0 - libiconv=1.17 - libjpeg-turbo=3.0.0 @@ -129,7 +129,7 @@ dependencies: - libxml2=2.12.6 - libzlib=1.2.13 - make=4.3 - - ncurses=6.4.20240210 + - ncurses=6.5 - oniguruma=6.9.9 - openssl=3.3.0 - pandoc=3.1.13 @@ -149,7 +149,7 @@ dependencies: - r-babelgene=22.9 - r-backbone=2.1.3 - r-backports=1.4.1 - - r-base=4.3.3 + - r-base=4.2.3 - r-base64enc=0.1_3 - r-bbmle=1.0.25.1 - r-bdsmatrix=1.3_7 @@ -187,7 +187,7 @@ dependencies: - r-curl=5.1.0 - r-data.table=1.15.2 - r-dbi=1.2.2 - - r-dbplyr=2.5.0 + - r-dbplyr=2.3.4 - r-dendextend=1.17.1 - r-desc=1.4.3 - r-devtools=2.4.5 @@ -232,7 +232,6 @@ dependencies: - r-ggplotify=0.1.2 - r-ggraph=2.1.0 - r-ggrepel=0.9.5 - - r-ggridges=0.5.6 - r-ggstats=0.6.0 - r-gh=1.4.1 - r-gitcreds=0.1.2 @@ -272,7 +271,7 @@ dependencies: - r-locfit=1.5_9.9 - r-logging=0.10_108 - r-magrittr=2.0.3 - - r-mass=7.3_60 + - r-mass=7.3_60.0.1 - r-matrix=1.6_5 - r-matrixstats=1.3.0 - r-memoise=2.0.1 @@ -319,7 +318,7 @@ dependencies: - r-rcmdcheck=1.4.0 - r-rcolorbrewer=1.1_3 - r-rcpp=1.0.12 - - r-rcpparmadillo=0.12.8.2.1 + - r-rcpparmadillo=0.12.8.3.0 - r-rcppeigen=0.3.4.0.0 - r-rcppnumerical=0.6_0 - r-rcurl=1.98_1.14 @@ -334,7 +333,7 @@ dependencies: - r-rjson=0.2.21 - r-rlang=1.1.3 - r-rle=0.9.2 - - r-rmarkdown=2.26 + - r-rmarkdown=2.25 - r-roxygen2=7.3.1 - r-rprojroot=2.0.4 - r-rsqlite=2.3.4 @@ -408,7 +407,7 @@ dependencies: - tk=8.6.13 - tktable=2.10 - toml=0.10.2 - - tomlkit=0.12.4 + - tomlkit=0.12.5 - tzdata=2024a - wheel=0.43.0 - xmltodict=0.13.0 From 4cd47dd31fd5cbdf1da6da14fc3ae8620d354154 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 16:01:38 -0400 Subject: [PATCH 22/93] updt NAMESPACE --- lib/lcdbwf/NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/lcdbwf/NAMESPACE b/lib/lcdbwf/NAMESPACE index 6ae92683..0fdcde84 100644 --- a/lib/lcdbwf/NAMESPACE +++ b/lib/lcdbwf/NAMESPACE @@ -1,2 +1,3 @@ # Generated by roxygen2: do not edit by hand +export(enrich_to_genetonic) From 50dcc8c811f5d080c5ecb67d2704a8a7f6cce4f7 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 16:01:54 -0400 Subject: [PATCH 23/93] updt DESCRIPTION --- lib/lcdbwf/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/DESCRIPTION b/lib/lcdbwf/DESCRIPTION index 97dc6126..f83ccdbb 100644 --- a/lib/lcdbwf/DESCRIPTION +++ b/lib/lcdbwf/DESCRIPTION @@ -23,4 +23,4 @@ License: MIT Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.1 From f49203dbe74383186c49c8f4ca6ca9fdd2d7fada Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:20:02 -0400 Subject: [PATCH 24/93] save raw & carnation-ready objects --- workflows/rnaseq/downstream/rnaseq.Rmd | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd index 1dcf7d91..29d3f1e5 100644 --- a/workflows/rnaseq/downstream/rnaseq.Rmd +++ b/workflows/rnaseq/downstream/rnaseq.Rmd @@ -312,8 +312,13 @@ knitr::kable(tbl, row.names=FALSE) ``` ```{r combined_rds, cache=TRUE, dependson='assemble_variables'} -obj <- lcdbwf:::compose_results(res_list, dds_list) -saveRDS(obj, file='combined.Rds', compress=FALSE) +obj <- list(res_list=res_list, dds_list=dds_list) +saveRDS(obj, file='combined-raw.Rds', compress=FALSE) + +app_obj <- lcdbwf:::compose_results(res_list=res_list, + dds_list=dds_list) + +saveRDS(app_obj, file='combined.Rds', compress=FALSE) ``` From 06efdf9e50d729afced36ed049384a44e0251a6d Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:20:22 -0400 Subject: [PATCH 25/93] use raw obj as starting point --- workflows/rnaseq/downstream/gene-patterns.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 5608ae25..6b3d785c 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -34,7 +34,7 @@ config <- lcdbwf:::load_config('config.yaml') ``` ```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} -obj <- readRDS('combined.Rds') +obj <- readRDS('combined-raw.Rds') res_list <- obj$res_list dds_list <- obj$dds_list From de4937cf5444ad3482db98bb22e523e6b96eee6e Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:20:40 -0400 Subject: [PATCH 26/93] add GeneTonic lib call --- workflows/rnaseq/downstream/gene-patterns.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 6b3d785c..b21aae59 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -22,6 +22,7 @@ library(tidyr) library(clusterProfiler) library(DESeq2) library(DEGreport) +library(GeneTonic) ``` ```{r load_helpers} From 6e7faab10d0e5d414636ca1e14eed3d6922cb78b Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:22:07 -0400 Subject: [PATCH 27/93] start w raw obj --- .../rnaseq/downstream/functional-enrichment.Rmd | 2 +- workflows/rnaseq/downstream/gene-patterns.Rmd | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index d1ecf952..db8a311f 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -45,7 +45,7 @@ if (config$parallel$parallel){ ``` ```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} -obj <- readRDS('combined.Rds') +obj <- readRDS('combined-raw.Rds') res_list <- obj$res_list dds_list <- obj$dds_list diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index b21aae59..53f1ccba 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -253,11 +253,17 @@ for (name in names(res_list)) { ``` ```{r combined_rds, cache=TRUE, dependson='finalclusters'} +obj <- list(res_list=res_list, + dds_list=dds_list, + enrich_list=enrich_list, + degpatterns_list=degpatterns_list) +saveRDS(obj, file='combined-raw.Rds', compress=FALSE) -obj <- lcdbwf:::compose_results(rds_file='combined.Rds', - degpatterns_list=degpatterns_list) +app_obj <- lcdbwf:::compose_results(rds_file='combined-raw.Rds', + enrich_list=enrich_list, + degpatterns_list=degpatterns_list) +saveRDS(app_obj, file='combined.Rds', compress=FALSE) -saveRDS(obj, file='combined.Rds', compress=FALSE) ``` # Exported results From 3db360c35134e36c5d85dcd35d4c2c48d7eee7e9 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:23:37 -0400 Subject: [PATCH 28/93] load GeneTonic & DESeq2 --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index db8a311f..57c92b45 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -20,6 +20,8 @@ knitr::opts_chunk$set( library(AnnotationHub) library(dplyr) library(BiocParallel) +library(GeneTonic) +library(DESeq2) ``` # Functional enrichment analysis From fbe7834d33892d82dd6988fdfae8245fa180dcd6 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:23:50 -0400 Subject: [PATCH 29/93] load degpatterns_list if present --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 57c92b45..1454252e 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -57,6 +57,13 @@ if(!'rld_list' %in% names(obj)){ } else { rld_list <- obj$rld_list } + +# check if 'degpatterns_list' exists in the object, if not sets to NULL +if(!'degpatterns_list' %in% names(obj)){ + degpatterns_list <- NULL +} else { + degpatterns_list <- obj$degpatterns_list +} ``` ```{r functional_enrichment_prep, cache=TRUE, config=config$annotation$keytype, eval=config$toggle$functional_enrichment, dependson='load'} From f666f3cb62e7aa4950b5eb40dde269977ed419b4 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Thu, 9 May 2024 21:24:04 -0400 Subject: [PATCH 30/93] save raw & carnation-ready obj --- .../rnaseq/downstream/functional-enrichment.Rmd | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 1454252e..f98ec99a 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -82,11 +82,17 @@ enrich_list <- lcdbwf:::run_enricher(res_list=res_list, ``` ```{r combined_rds, cache=TRUE, dependson='enrich'} +obj <- list(res_list=res_list, + dds_list=dds_list, + enrich_list=enrich_list, + degpatterns_list=degpatterns_list) +saveRDS(obj, file='combined-raw.Rds', compress=FALSE) -obj <- lcdbwf:::compose_results(rds_file='combined.Rds', - enrich_list=enrich_list) +app_obj <- lcdbwf:::compose_results(rds_file='combined-raw.Rds', + enrich_list=enrich_list, + degpatterns_list=degpatterns_list) +saveRDS(app_obj, file='combined.Rds', compress=FALSE) -saveRDS(obj, file='combined.Rds', compress=FALSE) ``` ```{r functional_enrichment_plots} From e561fb18e97cd84d97170fb6f2307bb3130dae32 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Fri, 10 May 2024 09:52:37 -0400 Subject: [PATCH 31/93] switch !is.null to missing to correctly check fr missing args --- lib/lcdbwf/R/helpers.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index b56c1d5a..de90d696 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -545,11 +545,11 @@ compose_results <- function(res_list=NULL, dds_list <- tmp$dds_list # plug in optional slots unless specified already - if('rld_list' %in% names(tmp) & !is.null(rld_list)) rld_list <- tmp$rld_list - if('enrich_list' %in% names(tmp) & !is.null(enrich_list)) enrich_list <- tmp$enrich_list - if('degpatterns_list' %in% names(tmp) & !is.null(degpatterns_list)) degpatterns_list <- tmp$degpatterns_list - if('all_dds' %in% names(tmp) & !is.null(all_dds)) all_dds <- tmp$all_dds - if('all_rld' %in% names(tmp) & !is.null(all_rld)) all_rld <- tmp$all_rld + if('rld_list' %in% names(tmp) & missing(rld_list)) rld_list <- tmp$rld_list + if('enrich_list' %in% names(tmp) & missing(enrich_list)) enrich_list <- tmp$enrich_list + if('degpatterns_list' %in% names(tmp) & missing(degpatterns_list)) degpatterns_list <- tmp$degpatterns_list + if('all_dds' %in% names(tmp) & missing(all_dds)) all_dds <- tmp$all_dds + if('all_rld' %in% names(tmp) & missing(all_rld)) all_rld <- tmp$all_rld } message('\n1. Processing res_list & dds_list') From ae731a28722427adb677eacef95d52b6db7b95b4 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Fri, 10 May 2024 16:37:38 -0400 Subject: [PATCH 32/93] use exact matching fr 'gene' or 'symbol' columns --- lib/lcdbwf/R/helpers.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index de90d696..d0d5521f 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -839,18 +839,18 @@ sanitize_res_dds <- function(res_list, dds_list, rld_list, } # NOTE: check that a single 'gene' column exists. - gene_idx <- grep('gene', tolower(colnames(res))) + gene_idx <- which(tolower(colnames(res)) == 'gene') if(length(gene_idx) > 1){ stop(paste('res_list elements can only have 1 "gene" column:', name)) } else if(length(gene_idx) == 0){ # If 'gene' column not present, replace with rownames message(paste('res_list element is missing a "gene" column. "rownames" will be used instead:', name)) res$gene <- rownames(res) - gene_idx <- grep('gene', tolower(colnames(res))) + gene_idx <- which(tolower(colnames(res)) == 'gene') } # NOTE: check that a single 'symbol' column exists. - symbol_idx <- grep('symbol', tolower(colnames(res))) + symbol_idx <- which(tolower(colnames(res)) == 'symbol') if(length(symbol_idx) > 1){ stop(paste('res_list elements can only have 1 "symbol" column:', name)) } else if(length(symbol_idx) == 0){ From 7ab73578eb0e02c87af06a62dbe480b9536e76d1 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Fri, 10 May 2024 16:38:03 -0400 Subject: [PATCH 33/93] make search fr reserved cols case-insensitive --- lib/lcdbwf/R/helpers.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index d0d5521f..172762f9 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -884,13 +884,13 @@ sanitize_res_dds <- function(res_list, dds_list, rld_list, rld <- rld_list[[ name ]] # NOTE: colData cannot contain reserved column names - if(any(reserved_cols %in% names(colData(dds)))){ + if(any(reserved_cols %in% tolower(names(colData(dds))))){ dds_reserved <- intersect(reserved_cols, names(colData(dds))) stop(paste('colData of dds_list object contains reserved column names -', paste0(dds_reserved, collapse=', '), ':', name)) } - if(any(reserved_cols %in% names(colData(rld)))){ + if(any(reserved_cols %in% tolower(names(colData(rld))))){ rld_reserved <- intersect(reserved_cols, names(colData(rld))) stop(paste('colData of res_list element contains reserved column names -', paste0(rld_reserved, collapse=', '), ':', name)) From 762f1b4aea8f96d72472926e4c102373484b9428 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Fri, 10 May 2024 16:38:20 -0400 Subject: [PATCH 34/93] fix message - should be 'rld_list' --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 172762f9..011a4d14 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -892,7 +892,7 @@ sanitize_res_dds <- function(res_list, dds_list, rld_list, if(any(reserved_cols %in% tolower(names(colData(rld))))){ rld_reserved <- intersect(reserved_cols, names(colData(rld))) - stop(paste('colData of res_list element contains reserved column names -', + stop(paste('colData of rld_list element contains reserved column names -', paste0(rld_reserved, collapse=', '), ':', name)) } From 7e81c3d581fd8b4117b9c444c7bbb11bdf7633a8 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:20:16 -0400 Subject: [PATCH 35/93] uncache load; only depend on res_list --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index f98ec99a..ff84613d 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -46,7 +46,7 @@ if (config$parallel$parallel){ } ``` -```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} +```{r load} obj <- readRDS('combined-raw.Rds') res_list <- obj$res_list dds_list <- obj$dds_list @@ -66,7 +66,7 @@ if(!'degpatterns_list' %in% names(obj)){ } ``` -```{r functional_enrichment_prep, cache=TRUE, config=config$annotation$keytype, eval=config$toggle$functional_enrichment, dependson='load'} +```{r functional_enrichment_prep, cache=TRUE, config=config$annotation$keytype, eval=config$toggle$functional_enrichment, cache.extra=res_list} # obtain ontology information for all ontologies ontology_list <- lcdbwf:::get_ontology_list(config) From fcc6cd8e3786fc728262ea8c1576ec6f66cbf90a Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:20:32 -0400 Subject: [PATCH 36/93] rm unused dependson --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index ff84613d..40bb4c2f 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -73,7 +73,7 @@ ontology_list <- lcdbwf:::get_ontology_list(config) ``` -```{r enrich, cache=TRUE, eval=config$toggle$functional_enrichment, config=c(config$main, config$functional_enrichment), dependson=c('functional_enrichment_prep', 'assemble_variables')} +```{r enrich, cache=TRUE, eval=config$toggle$functional_enrichment, config=c(config$main, config$functional_enrichment), dependson='functional_enrichment_prep'} enrich_list <- lcdbwf:::run_enricher(res_list=res_list, ontology_list=ontology_list, config=config, From 80375bcad22c2c39ccb5eec02c3c52e46322c254 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:21:06 -0400 Subject: [PATCH 37/93] uncache combined_rds chunk --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 40bb4c2f..4efc3bc5 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -81,11 +81,14 @@ enrich_list <- lcdbwf:::run_enricher(res_list=res_list, ``` -```{r combined_rds, cache=TRUE, dependson='enrich'} +```{r combined_rds} obj <- list(res_list=res_list, dds_list=dds_list, - enrich_list=enrich_list, - degpatterns_list=degpatterns_list) + rld_list=rld_list, + enrich_list=enrich_list) + +if(!is.null(degpatterns_list)) obj$degpatterns_list <- degpatterns_list + saveRDS(obj, file='combined-raw.Rds', compress=FALSE) app_obj <- lcdbwf:::compose_results(rds_file='combined-raw.Rds', From 00b404fe7bbb8803725a3b2a1b90ccec7d3ac85e Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:21:30 -0400 Subject: [PATCH 38/93] uncache load chunk; only depend on res_list rld_list --- workflows/rnaseq/downstream/gene-patterns.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 53f1ccba..da516768 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -34,7 +34,7 @@ devtools::load_all('../../../lib/lcdbwf') config <- lcdbwf:::load_config('config.yaml') ``` -```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} +```{r load} obj <- readRDS('combined-raw.Rds') res_list <- obj$res_list dds_list <- obj$dds_list @@ -116,7 +116,7 @@ low.minc <- 1 Gene pattern for changed genes in individual contrasts are also indicated below. -```{r finalclusters, fig.width=12, results='asis', cache=TRUE, dependson='load'} +```{r finalclusters, fig.width=12, results='asis', cache=TRUE, cache.extra=c(res_list, rld_list)} # Run the clustering, identify patterns, and generate plots. # Docs: https://lcdb.github.io/lcdb-wf/rnaseq-rmd.html#finalclusters # NOTE: which genes to cluster?------------------------------------------------ From 1e585c855f25dac556c356aa8f4ae84bc4bbb713 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:21:52 -0400 Subject: [PATCH 39/93] make rld_list if missing or NULL --- workflows/rnaseq/downstream/gene-patterns.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index da516768..2db71b48 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -40,7 +40,7 @@ res_list <- obj$res_list dds_list <- obj$dds_list # make rld_list if not present in obj -if(!'rld_list' %in% names(obj)){ +if(!'rld_list' %in% names(obj) | is.null(obj$rld_list)){ rld_list <- lapply(dds_list, function(x) varianceStabilizingTransformation(x, blind=TRUE) ) From 4961cfafe7bafb8d4d439b5c08afe1a989e10e18 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:22:17 -0400 Subject: [PATCH 40/93] uncache combined_rds --- workflows/rnaseq/downstream/gene-patterns.Rmd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 2db71b48..b75dedd0 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -252,11 +252,13 @@ for (name in names(res_list)) { } ``` -```{r combined_rds, cache=TRUE, dependson='finalclusters'} +```{r combined_rds} obj <- list(res_list=res_list, dds_list=dds_list, - enrich_list=enrich_list, + rld_list=rld_list, degpatterns_list=degpatterns_list) +if(!is.null(enrich_list)) obj$enrich_list <- enrich_list + saveRDS(obj, file='combined-raw.Rds', compress=FALSE) app_obj <- lcdbwf:::compose_results(rds_file='combined-raw.Rds', From 10423cd9b2efcd89715039006a82711f62315381 Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:22:26 -0400 Subject: [PATCH 41/93] cache reportresults chunk --- workflows/rnaseq/downstream/gene-patterns.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index b75dedd0..4c425102 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -270,7 +270,7 @@ saveRDS(app_obj, file='combined.Rds', compress=FALSE) # Exported results -```{r excel, results='asis'} +```{r excel, results='asis', cache=TRUE, dependson='finalclusters'} lcdbwf:::exported_excel(res_list, dds_list , file='final_clusters/consolidated_results.xlsx') ``` From afde45fddeaecfc1168f5628beac361c0d67355e Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Mon, 13 May 2024 16:22:55 -0400 Subject: [PATCH 42/93] add tests for downstream --- workflows/rnaseq/run_downstream_test.sh | 137 ++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 8 deletions(-) diff --git a/workflows/rnaseq/run_downstream_test.sh b/workflows/rnaseq/run_downstream_test.sh index 613a4118..ce6847c4 100755 --- a/workflows/rnaseq/run_downstream_test.sh +++ b/workflows/rnaseq/run_downstream_test.sh @@ -8,16 +8,137 @@ set -e # See the docstring of that file for details on how it works. # # Here, we run the preprocessor on all the Rmd files in downstream/, and store -# the newly-converted ones in dowstream-test/. Then we run rmarkdown::render on +# the newly-converted ones in downstream-test/. Then we run rmarkdown::render on # those new files. -mkdir -p downstream-test +TESTDIR=downstream-test +mkdir -p $TESTDIR for i in downstream/*.Rmd; do - python ../../ci/preprocessor.py $i > downstream-test/$(basename $i) + python ../../ci/preprocessor.py $i > $TESTDIR/$(basename $i) done +echo +echo "Standard workflow: rnaseq.Rmd -> functional-enrichment.Rmd -> gene-patterns.Rmd" + # Make sure we move the config file there too -cp downstream/config.yaml downstream-test/config.yaml -cp downstream/text.yaml downstream-test/text.yaml -Rscript -e "rmarkdown::render('downstream-test/rnaseq.Rmd')" -Rscript -e "rmarkdown::render('downstream-test/functional-enrichment.Rmd')" -Rscript -e "rmarkdown::render('downstream-test/gene-patterns.Rmd')" +cp downstream/config.yaml $TESTDIR/config.yaml +cp downstream/text.yaml $TESTDIR/text.yaml + +# run rnaseq.Rmd +Rscript -e "rmarkdown::render('$TESTDIR/rnaseq.Rmd'); \ + sink('$TESTDIR/obj-names.txt'); cat(names(obj)); sink(); \ + sink('$TESTDIR/app-obj-names.txt'); cat(names(app_obj)); sink();" + +OBS=$(cat $TESTDIR/obj-names.txt) +echo +echo "- check raw object slots: $OBS" +EXP="res_list dds_list" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +OBS=$(cat $TESTDIR/app-obj-names.txt) +echo "- check app object slots: $OBS" +EXP="res dds rld labels dds_mapping" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +# run functional-enrichment.Rmd +Rscript -e "rmarkdown::render('$TESTDIR/functional-enrichment.Rmd'); \ + sink('$TESTDIR/obj-names.txt'); cat(names(obj)); sink(); \ + sink('$TESTDIR/app-obj-names.txt'); cat(names(app_obj)); sink();" + +OBS=$(cat $TESTDIR/obj-names.txt) +echo +echo "- check raw object slots: $OBS" +EXP="res_list dds_list rld_list enrich_list" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +OBS=$(cat $TESTDIR/app-obj-names.txt) +echo "- check app object slots: $OBS" +EXP="res dds rld labels dds_mapping enrich genetonic" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +# run gene-patterns.Rmd +Rscript -e "rmarkdown::render('$TESTDIR/gene-patterns.Rmd'); \ + sink('$TESTDIR/obj-names.txt'); cat(names(obj)); sink(); \ + sink('$TESTDIR/app-obj-names.txt'); cat(names(app_obj)); sink();" + +OBS=$(cat $TESTDIR/obj-names.txt) +echo +echo "- check raw object slots: $OBS" +EXP="res_list dds_list rld_list degpatterns_list enrich_list" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +OBS=$(cat $TESTDIR/app-obj-names.txt) +echo "- check app object slots: $OBS" +EXP="res dds rld labels dds_mapping enrich genetonic degpatterns" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + + +echo +echo "Variation 1: rnaseq.Rmd -> gene-patterns.Rmd -> functional-enrichment.Rmd" + +# now remove functional enrichment and gene patterns slots from combined-raw.Rds +# and rerun with those Rmds swapped +BEFORE=$(date -r $TESTDIR/final_clusters/consolidated_results.xlsx) + +echo +echo "- Remove enrich_list & degpatterns_list slots from raw obj" +Rscript -e "obj <- readRDS('$TESTDIR/combined-raw.Rds'); \ + obj <- obj[ c('res_list', 'dds_list', 'rld_list') ]; \ + saveRDS(obj, '$TESTDIR/combined-raw.Rds', compress=FALSE)" + +echo "- Remove enrich_list & degpatterns_list slots from app obj" +Rscript -e "obj <- readRDS('$TESTDIR/combined.Rds'); \ + obj <- obj[ c('res', 'dds', 'rld', 'labels', 'dds_mapping') ]; \ + saveRDS(obj, '$TESTDIR/combined.Rds', compress=FALSE)" + +echo "- Empty functional enrichment and gene patterns cache" +rm -rf $TESTDIR/functional-enrichment_cache +rm -rf $TESTDIR/functional-enrichment_files +rm -rf $TESTDIR/gene-patterns_cache +rm -rf $TESTDIR/gene-patterns_files + +Rscript -e "rmarkdown::render('$TESTDIR/gene-patterns.Rmd'); \ + sink('$TESTDIR/obj-names.txt'); cat(names(obj)); sink(); \ + sink('$TESTDIR/app-obj-names.txt'); cat(names(app_obj)); sink();" +OBS=$(cat $TESTDIR/obj-names.txt) +echo +echo "- check raw object slots: $OBS" +EXP="res_list dds_list rld_list degpatterns_list" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +OBS=$(cat $TESTDIR/app-obj-names.txt) +echo "- check app object slots: $OBS" +EXP="res dds rld labels dds_mapping degpatterns" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + + +Rscript -e "rmarkdown::render('$TESTDIR/functional-enrichment.Rmd'); \ + sink('$TESTDIR/obj-names.txt'); cat(names(obj)); sink(); \ + sink('$TESTDIR/app-obj-names.txt'); cat(names(app_obj)); sink();" +OBS=$(cat $TESTDIR/obj-names.txt) +echo +echo "- check raw object slots: $OBS" +EXP="res_list dds_list rld_list enrich_list degpatterns_list" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +OBS=$(cat $TESTDIR/app-obj-names.txt) +echo "- check app object slots: $OBS" +EXP="res dds rld labels dds_mapping enrich genetonic degpatterns" +[ "$OBS" != "$EXP" ] && echo "Object does not have expected slots" && exit 1 + +AFTER=$(date -r $TESTDIR/final_clusters/consolidated_results.xlsx) + +echo +echo "- make sure results were updated" +[[ $AFTER < $BEFORE ]] && echo "- Results were not updated" && exit 1 + +# rerun after just touching raw RDS file +echo +echo "touch raw RDS file. Shouldn't update gene patterns" + +BEFORE=$(date -r $TESTDIR/final_clusters/consolidated_results.xlsx) +touch $TESTDIR/combined-raw.Rds +Rscript -e "rmarkdown::render('$TESTDIR/gene-patterns.Rmd')" +AFTER=$(date -r $TESTDIR/final_clusters/consolidated_results.xlsx) +[[ $AFTER > $BEFORE ]] && echo "- Gene patterns was updated" && exit 1 + +echo "Done!" From 1fdcde8525e0c6cdd9484e23d7a6b0cf3a7ade5b Mon Sep 17 00:00:00 2001 From: Apratim Mitra Date: Tue, 14 May 2024 10:08:48 -0400 Subject: [PATCH 43/93] correctly handle 'res' key --- lib/lcdbwf/R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index 011a4d14..f8a34dc0 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -656,7 +656,7 @@ compose_results <- function(res_list=NULL, for(name in enrich_not_res){ if(!'res' %in% names(enrich_list[[ name ]])){ no_res_key <- c(no_res_key, name) - } else if(!enrich_list[['res']] %in% res_names){ + } else if(!enrich_list[[ name ]][['res']] %in% res_names){ no_res_key <- c(no_res_key, name) } } From aeabb3d0272173e5552ff715c53905c3eb27f4f7 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Thu, 16 May 2024 20:00:14 +0000 Subject: [PATCH 44/93] argument added to and functions in lib/lcdbwf/functional_enrichment.R --- lib/lcdbwf/R/functional_enrichment.R | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/lcdbwf/R/functional_enrichment.R b/lib/lcdbwf/R/functional_enrichment.R index 3698e8a5..678684b2 100644 --- a/lib/lcdbwf/R/functional_enrichment.R +++ b/lib/lcdbwf/R/functional_enrichment.R @@ -4,10 +4,11 @@ #' @param config Config object #' @param cores Number of cores to run it on #' @param sep Character to separate res_list names +#' @param universe logical, TRUE limits background genes and FALSE doesn't #' #' @return nested list of enrichResult objects run_enricher <- function(res_list, ontology_list, config, - cores=1, sep='*'){ + cores=1, sep='*', universe=TRUE){ # This function supports running in parallel which works best with a flat # list; however for organizational purposese we want a nested structure. So # we convert between the two by collapsing nested keys for flat list, and @@ -42,7 +43,8 @@ run_enricher <- function(res_list, ontology_list, config, direction=direction, TERM2GENE=ontology_list[['term2gene']][[ont]], TERM2NAME=ontology_list[['term2name']][[ont]], - config=config + config=config, + universe=universe, ) enrich_res }, BPPARAM=BiocParallel::MulticoreParam(cores)) @@ -101,11 +103,14 @@ collapse_names <- function(res_list, config, sep='*'){ #' lfc_thresh from the config. #' @param kind One of "OR" for overrepresentation or "GSEA" for gene set #' enrichment analysis. +#' @param universe background genes. If missing, all genes listed in the database +#' (e.g. TERM2GENE table) will be used as background. See more info in +#' https://github.com/YuLab-SMU/clusterProfiler/blob/devel/R/enricher.R#L8 #' @param ... Additional arguments are passed on to enricher() for kind="OR" or #' GSEA() for kind="GSEA". #' #' @return An enrichResults object from -enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', ...){ +enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', universe, ...){ if (is.null(config$main$lfc_thresh)){ lfc_thresh <- 0 @@ -121,13 +126,19 @@ enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', direction=direction, return_type="rownames" ) - + # Update the `universe` variable to a vector of background or all genes + if (universe) { + universe <- rownames(res$res) # uses only detected genes in the current dataset + } else { + universe <- NULL # uses all genes in the given DB + } e <- clusterProfiler::enricher( genes, TERM2GENE=TERM2GENE, TERM2NAME=TERM2NAME, pvalueCutoff=config$functional_enrichment$pvalueCutoff, qvalueCutoff=config$functional_enrichment$qvalueCutoff, + universe=universe, ... ) } else if (kind == "GSEA"){ From 455a05746211b35aad2be5f60dc71eb4a42d21a6 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Thu, 16 May 2024 20:01:17 +0000 Subject: [PATCH 45/93] added to workflows/rnaseq/downstream/config.yaml in order to provide an option limiting background genes in functional enrichment analysis --- workflows/rnaseq/downstream/config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflows/rnaseq/downstream/config.yaml b/workflows/rnaseq/downstream/config.yaml index 974a2d09..b7be354f 100644 --- a/workflows/rnaseq/downstream/config.yaml +++ b/workflows/rnaseq/downstream/config.yaml @@ -236,3 +236,7 @@ functional_enrichment: # fine getting plots even with no statistically significant terms. pvalueCutoff: 1 qvalueCutoff: 1 + + # This sets the `universe` argument of the `enricher` function to only detected genes + # See https://github.com/YuLab-SMU/clusterProfiler/blob/devel/R/enricher.R#L8C21-L8C136 + limit_background_genes: TRUE From 94fb878801a9b8b79aabd4066fd3fbc1ac697fa1 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Thu, 16 May 2024 20:02:39 +0000 Subject: [PATCH 46/93] argument set in function in workflows/rnaseq/downstream/functional-enrichment.Rmd. This argument will limit background genes to detected genes in the given dataset in functional enrichment analysis --- workflows/rnaseq/downstream/functional-enrichment.Rmd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 7eaea3fa..9e108f5f 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -65,10 +65,13 @@ ontology_list <- lcdbwf:::get_ontology_list(config) ``` ```{r enrich, cache=TRUE, eval=config$toggle$functional_enrichment, config=c(config$main, config$functional_enrichment), dependson=c('functional_enrichment_prep', 'assemble_variables')} -enrich_list <- lcdbwf:::run_enricher(res_list=res_list, - ontology_list=ontology_list, - config=config, - cores=cores, sep='*') +enrich_list <- lcdbwf:::run_enricher( + res_list=res_list, + ontology_list=ontology_list, + config=config, + universe=config$functional_enrichment$limit_background_genes, + cores=cores, + sep='*') ``` From 3fd0118477e91d4d526ed47539c6d2677b209119 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Fri, 17 May 2024 19:41:45 +0000 Subject: [PATCH 47/93] argument added to function to limit background genes --- lib/lcdbwf/R/functional_enrichment.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/functional_enrichment.R b/lib/lcdbwf/R/functional_enrichment.R index 678684b2..97a74612 100644 --- a/lib/lcdbwf/R/functional_enrichment.R +++ b/lib/lcdbwf/R/functional_enrichment.R @@ -44,7 +44,7 @@ run_enricher <- function(res_list, ontology_list, config, TERM2GENE=ontology_list[['term2gene']][[ont]], TERM2NAME=ontology_list[['term2name']][[ont]], config=config, - universe=universe, + universe=universe ) enrich_res }, BPPARAM=BiocParallel::MulticoreParam(cores)) @@ -152,6 +152,7 @@ enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', TERM2GENE=TERM2GENE, TERM2NAME=TERM2NAME, pvalueCutoff=config$functional_enrichment$pvalueCutoff, + universe=universe, ... ) From c5ab6040dc8ee69744421e75e8c0cae0e11ebe8c Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Tue, 21 May 2024 03:22:56 +0000 Subject: [PATCH 48/93] run_enricher() updated, nonzero_genes() added to limit background genes --- lib/lcdbwf/R/functional_enrichment.R | 31 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/lib/lcdbwf/R/functional_enrichment.R b/lib/lcdbwf/R/functional_enrichment.R index 97a74612..f84037d6 100644 --- a/lib/lcdbwf/R/functional_enrichment.R +++ b/lib/lcdbwf/R/functional_enrichment.R @@ -4,11 +4,11 @@ #' @param config Config object #' @param cores Number of cores to run it on #' @param sep Character to separate res_list names -#' @param universe logical, TRUE limits background genes and FALSE doesn't +#' @param universe_list List of vectors for background genes #' #' @return nested list of enrichResult objects run_enricher <- function(res_list, ontology_list, config, - cores=1, sep='*', universe=TRUE){ + cores=1, sep='*', universe_list=universe_list){ # This function supports running in parallel which works best with a flat # list; however for organizational purposese we want a nested structure. So # we convert between the two by collapsing nested keys for flat list, and @@ -44,7 +44,7 @@ run_enricher <- function(res_list, ontology_list, config, TERM2GENE=ontology_list[['term2gene']][[ont]], TERM2NAME=ontology_list[['term2name']][[ont]], config=config, - universe=universe + universe=universe_list[[name]] ) enrich_res }, BPPARAM=BiocParallel::MulticoreParam(cores)) @@ -88,6 +88,21 @@ collapse_names <- function(res_list, config, sep='*'){ return(names) } + +#' Function to retrieve genes with non-zero raw counts in one or more samples +#' +#' @param dds DESeqDataSet object +#' +#' @return a vector containing gene IDs with raw counts are greater than 0 in one or more samples +nonzero_genes <- function(dds) { + # Extract raw count matrix + counts <- DESeq2::counts(dds, normalized=FALSE) + # Subset rows where rowsums are greater than zero + counts <- counts[rowSums(counts) > 0,] + # Return gene IDs + return(rownames(counts)) +} + #' All-in-one enrichment function. #' #' Designed to not require an orgdb, and instead requires dataframes of @@ -104,8 +119,7 @@ collapse_names <- function(res_list, config, sep='*'){ #' @param kind One of "OR" for overrepresentation or "GSEA" for gene set #' enrichment analysis. #' @param universe background genes. If missing, all genes listed in the database -#' (e.g. TERM2GENE table) will be used as background. See more info in -#' https://github.com/YuLab-SMU/clusterProfiler/blob/devel/R/enricher.R#L8 +#' (e.g. TERM2GENE table) will be used as background. #' @param ... Additional arguments are passed on to enricher() for kind="OR" or #' GSEA() for kind="GSEA". #' @@ -126,12 +140,7 @@ enrich_test <- function(res, TERM2GENE, TERM2NAME, config, direction, kind='OR', direction=direction, return_type="rownames" ) - # Update the `universe` variable to a vector of background or all genes - if (universe) { - universe <- rownames(res$res) # uses only detected genes in the current dataset - } else { - universe <- NULL # uses all genes in the given DB - } + e <- clusterProfiler::enricher( genes, TERM2GENE=TERM2GENE, From f97c0ae24f632e2c01b29303ca93229f86cfd5a8 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Tue, 21 May 2024 03:27:51 +0000 Subject: [PATCH 49/93] limit_background_genes updated to choose an option for universe --- workflows/rnaseq/downstream/config.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/config.yaml b/workflows/rnaseq/downstream/config.yaml index b7be354f..3176f3ae 100644 --- a/workflows/rnaseq/downstream/config.yaml +++ b/workflows/rnaseq/downstream/config.yaml @@ -237,6 +237,11 @@ functional_enrichment: pvalueCutoff: 1 qvalueCutoff: 1 - # This sets the `universe` argument of the `enricher` function to only detected genes - # See https://github.com/YuLab-SMU/clusterProfiler/blob/devel/R/enricher.R#L8C21-L8C136 - limit_background_genes: TRUE + # This sets the `universe` argument of the `enricher` function to choose an option how to limit + # background genes. Choose one from the following options: + # - exclude_zero_in_all: excludes genes with zero raw count in all samples + # - no_filter: disable any filtering. This option will use all available genes in + # each annotation database. You can always use this option and then manually + # adjust the results objects going into the `enricher` function if you need + # more customization. + limit_background_genes: "exclude_zero_in_all" From b013bfadf0af57e4d57c5452b62492d829c57333 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Tue, 21 May 2024 03:32:59 +0000 Subject: [PATCH 50/93] universe_list to limit background genes --- .../downstream/functional-enrichment.Rmd | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 0cb7a5ea..5af490e7 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -71,16 +71,33 @@ if(!'degpatterns_list' %in% names(obj)){ # obtain ontology information for all ontologies ontology_list <- lcdbwf:::get_ontology_list(config) +# Define background gene pool for overrepresentation analysis +# This option will limit genes to the following criteria: +# - option 1 ("exclude_zero_in_all"): genes with non-zero raw counts in one or more samples +# - option 2 ("no_filter"): all available genes in each annotation database (e.g. GO, KEGG) +univ_option <- config$functional_enrichment$limit_background_genes + +# Print the current universe option +if (univ_option == "exclude_zero_in_all") { + # Retrieve all genes detected in one or more samples per contrast + universe_list <- lapply(names(dds_list), function(name) nonzero_genes(dds_list[[name]])) + names(universe_list) <- names(dds_list) +} else if (univ_option == "no_filter") { + # Set universe to NULL + universe_list <- NULL +} else { + stop("Correct your background option in the config.yaml file.") +} ``` + ```{r enrich, cache=TRUE, eval=config$toggle$functional_enrichment, config=c(config$main, config$functional_enrichment), dependson='functional_enrichment_prep'} -enrich_list <- lcdbwf:::run_enricher( - res_list=res_list, - ontology_list=ontology_list, - config=config, - universe=config$functional_enrichment$limit_background_genes, - cores=cores, - sep='*') +enrich_list <- lcdbwf:::run_enricher(res_list=res_list, + ontology_list=ontology_list, + config=config, + universe_list=universe_list, + cores=cores, sep='*') + ``` ```{r combined_rds} From a62c342ade860db777c7d1ce72fc78e821e117bf Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Tue, 21 May 2024 18:02:45 +0000 Subject: [PATCH 51/93] rnaseq downstream configuration updated to better describe about limit_background_genes --- workflows/rnaseq/downstream/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/config.yaml b/workflows/rnaseq/downstream/config.yaml index 3176f3ae..2ec85070 100644 --- a/workflows/rnaseq/downstream/config.yaml +++ b/workflows/rnaseq/downstream/config.yaml @@ -241,7 +241,7 @@ functional_enrichment: # background genes. Choose one from the following options: # - exclude_zero_in_all: excludes genes with zero raw count in all samples # - no_filter: disable any filtering. This option will use all available genes in - # each annotation database. You can always use this option and then manually - # adjust the results objects going into the `enricher` function if you need - # more customization. + # each annotation database. If a user wishes to use custom background genes, + # use this option with `universe_list` set to a named list of background genes, + # where the list names are identical to those of the result list. limit_background_genes: "exclude_zero_in_all" From 8f4afce5cf5eaad4c874d09c6e1e5041214b9c95 Mon Sep 17 00:00:00 2001 From: Mira Sohn Date: Tue, 21 May 2024 18:04:17 +0000 Subject: [PATCH 52/93] downstream functional enrichment analysis updated with an error for unmatched list names between universe_list and res_list & indentation changed --- .../downstream/functional-enrichment.Rmd | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 5af490e7..7731d9d3 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -72,21 +72,26 @@ if(!'degpatterns_list' %in% names(obj)){ ontology_list <- lcdbwf:::get_ontology_list(config) # Define background gene pool for overrepresentation analysis -# This option will limit genes to the following criteria: -# - option 1 ("exclude_zero_in_all"): genes with non-zero raw counts in one or more samples -# - option 2 ("no_filter"): all available genes in each annotation database (e.g. GO, KEGG) univ_option <- config$functional_enrichment$limit_background_genes -# Print the current universe option +# Set `universe_list` based on configuration or custom background if (univ_option == "exclude_zero_in_all") { - # Retrieve all genes detected in one or more samples per contrast - universe_list <- lapply(names(dds_list), function(name) nonzero_genes(dds_list[[name]])) - names(universe_list) <- names(dds_list) + # Retrieve all genes detected in one or more samples per contrast + universe_list <- lapply(names(dds_list), function(name) nonzero_genes(dds_list[[name]])) + names(universe_list) <- names(dds_list) } else if (univ_option == "no_filter") { - # Set universe to NULL - universe_list <- NULL + ######################################################################################## + # Here, a user can manually set the `universe_list` to a named list of vectors for # + # custom background genes. Ensure to have names matched between `universe_list` and # + # `res_list`. # + ######################################################################################## + universe_list <- NULL + # Raise an error if names are unmatched + if (!identical(names(universe_list), names(res_list)) & !is.null(universe_list)) { + stop("universe_list has different names from res_list.") + } } else { - stop("Correct your background option in the config.yaml file.") + stop("Correct your background option in the config.yaml file.") } ``` From 6bf41a15d8c62fdca44ae9e6906fa1401a4fcfa5 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Wed, 22 May 2024 13:07:31 -0400 Subject: [PATCH 53/93] added tests for expected input to make_dds. Need to break things more next --- lib/lcdbwf/tests/testthat/test-dds.R | 88 ++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index 64f2da7f..cea30d03 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -1,3 +1,10 @@ +library(DESeq2) +devtools::load_all('../../../../lib/lcdbwf') +config <- lcdbwf:::load_config('config.yaml') +library(testthat) +source('test-functions.R') + +# --------- Test strip_dotted_version_from_dds() ---------- # test_that("stripping gene versions", { dds <- DESeq2::makeExampleDESeqDataSet() # genes are labeled like "gene1", "gene2", etc; change them to "gene1.1", @@ -15,3 +22,84 @@ test_that("stripping gene versions", { expect_equal(rownames(fixed)[1], "ENSgene1") }) +# --------------------------------------------------------- # + +# ---------- Test make_dds(test = 'LRT' || 'Wald') -------- # +test_that("make_dds handles minimum required design data of sampletale, + and design formula, using example featurecounts table", { + design_data <- make_design_data() + make_featurecounts_file() # featurecounts.txt will be written to this directory + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + # Check that the dds object is a DESeqDataSet (not NULL) + expect_true(inherits(dds, "DESeqDataSet")) + # Check that the WaldStatistic has been generated as Wald is the default test that + # is run with minimal input to make_dds() + expect_true(any(grepl('Wald', colnames(mcols(dds))))) + expect_false(any(grepl('LRT', colnames(mcols(dds))))) +}) + +test_that("make_dds handles design data where Wald test is specified explicitly", { + design_data <- make_design_data() + design_data$test <- 'Wald' + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + # Check that the dds object is a DESeqDataSet (not NULL) + expect_true(inherits(dds, "DESeqDataSet")) + # Check that the WaldStatistic has been generated when Wald is passed to make_dds() + expect_true(any(grepl('Wald', colnames(mcols(dds))))) + expect_false(any(grepl('LRT', colnames(mcols(dds))))) +}) + +test_that("make_dds handles required design data for LRT", { + design_data <- make_design_data() + design_data$test <- 'LRT' + design_data$reduced_design <- ~1 + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + # Check that the dds object is a DESeqDataSet (not NULL) + expect_true(inherits(dds, "DESeqDataSet")) + # Check that the WaldStatistic has been generated + expect_true(any(grepl('LRT', colnames(mcols(dds))))) + expect_false(any(grepl('Wald', colnames(mcols(dds))))) +}) + +test_that("make_dds errors on invalid 'test' option", { + design_data <- make_design_data() + design_data$test <- "invalid_test_option" + + expect_error(make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel), + "Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'") +}) +# --------------------------------------------------- # + +# -------------- collapseReplicates2 ---------------- # +test_that("collapseReplicates2 collapses the three control replicates and three treatment replicates to + a single control row and a single treatment row in colData. Row names should still match + column 1.", { + # Setup a DESeqDataSet with replicates + design_data <- make_design_data() + # Add bio_rep to colData + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Perform replicate collapsing + dds_collapsed <- collapseReplicates2(dds, dds$condition) + colData_collapsed <- as.data.frame(colData(dds_collapsed)) + expect_equal(length(as.character(colData_collapsed$condition[colData_collapsed$condition == 'control'])), 1) + expect_equal(length(as.character(colData_collapsed$condition[colData_collapsed$condition == 'treatment'])), 1) + # Check if the first column of colData matches rownames + expect_equal(rownames(colData_collapsed), colData_collapsed[,1]) +}) +# --------------------------------------------------- # From f8beb2c16a688d0d455799d567d1adb58719bc0c Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Wed, 22 May 2024 13:09:11 -0400 Subject: [PATCH 54/93] copied a recently used mouse config and text yaml required for make_dds --- lib/lcdbwf/tests/testthat/config.yaml | 247 ++++++++++++++++++++++++++ lib/lcdbwf/tests/testthat/text.yaml | 244 +++++++++++++++++++++++++ 2 files changed, 491 insertions(+) create mode 100644 lib/lcdbwf/tests/testthat/config.yaml create mode 100644 lib/lcdbwf/tests/testthat/text.yaml diff --git a/lib/lcdbwf/tests/testthat/config.yaml b/lib/lcdbwf/tests/testthat/config.yaml new file mode 100644 index 00000000..8d9833e5 --- /dev/null +++ b/lib/lcdbwf/tests/testthat/config.yaml @@ -0,0 +1,247 @@ +# ============================================================================ +# RNA-seq configuration file +# ============================================================================ +# +# The rnsaeq.Rmd RMarkdown file reads in the values from this YAML file. +# +# This file is split into sections, indicated by the top-level keys. +# +# In rnaseq.Rmd, this config file is loaded early on, is not cached, and +# thereafter is available as the `config` object. This is a nested list data +# structure. For example, in rnaseq.Rmd the currently-configured sample table +# can be accessed by ``config$main$sampletable``. +# +# Note that RMarkdown chunks may depend on one or more of these sections. +# Changing a value in a section will therefore appropriately cause all chunks +# depending on that section to be re-run. + + +# MAIN ------------------------------------------------------------------------- +# This section configures global options +main: + + # Path to sample table. It can be modified in the colData chunk if needed. + sampletable: "../config/sampletable.tsv" + + + # If you are using Ensembl annotations with gene IDs like ENSG000001.1 (with + # the trailing dotted version number), then these genes will not be found in + # the OrgDb. Set this to TRUE to strip off the dotted version to fix this. + strip_dotted_version: TRUE + + # If you have technical replicates, you can automatically collapse them. + # Provide a column in colData here, and for each unique value in the provided + # column, the counts across samples sharing that value will be summed. + # Typically this would be set to the column representing biological + # replicate, assuming technical replicates share the same biological + # replicate ID. + collapse_by: NULL + + # By default DESeq2 assumes alpha of 0.1 when filtering low-count genes. This + # is also used to select significant genes in the various helper functions. + alpha: 0.1 + + # If you plan on creating dds objects with featureCounts that will be + # compared with Salmon or Kallisto, there may be different numbers of genes + # in the respective dds objects. This can happen if the GTF used by + # featureCounts and the FASTA used by Salmon or Kallisto do not exactly + # match. By default, this mismatch will stop with an error. If instead you + # want to use only the intersection of genes found in all contrasts, set this + # to TRUE. + force_intersect: FALSE + + # AnnotationHub uses a safe permissions approach, setting the AnnotationHub + # lock file to be only visible by the creating user and the cache database to + # be read-only for the group. If this setting is TRUE, then the permissions + # will be set on BiocFileCache.sqlite and BiocFileCache.sqlite.LOCK to be + # read/write for both user and group. + group_permissions: TRUE + + +# PLOTTING --------------------------------------------------------------------- +# This section configures plotting options for PCA, clustered heatmap, and +# functional enrichment plots. +plotting: + + # For each item in this list, you will get a column of colors along the right + # side in the clustered heatmap and a PCA plot tab. + covariates_for_plots: + - Age + - Genotype + - Exercise + - Group + - Mass + - Litter + - AgeDays + - Genotype_Exercise + - Age_Genotype + - Exercise_Genotype + + # Arguments for dotplot_args, emapplot_args, and cnetplot_args are passed to + # the respective clusterProfiler plotting functions dotplot, emapplot, and + # cnetplot. Any arguments accepted by those functions can be added here. + dotplot_args: + showCategory: 20 + color: p.adjust + size: NULL + split: NULL + font.size: 12 + x: GeneRatio + + emapplot_args: + showCategory: 35 + color: p.adjust + cex_label_category: .7 + + cnetplot_args: + showCategory: 5 + foldChange: NULL + colorEdge: FALSE + circular: FALSE + node_label: all + + # If diagnostics_results_names is empty or missing, and + # toggle$results_diagnostics is TRUE (see the "toggle" section), then + # diagnostics will be run for all results objects. This can be time consuming + # if you have many contrasts. + # + # If toggle$results_diagnostics is FALSE, no diagnostics will be run for + # results and the diagnostics_results_names entry is ignored. + # + # Otherwise if toggle$results_diagnostics is TRUE and this list contains + # items, then only run diagnostics for the following names in res_list. This + # can be useful in cases where you ran many contrasts but only want to run + # diagnostics on a subset of them. + diagnostics_results_names: [] + + +# PARALLEL --------------------------------------------------------------------- +# Configure parallelization here. +parallel: + + parallel: TRUE + + # If "cores" is an integer, use that many cores. + # + # If "cores" is a string, assume it's the name of an environment variable + # that stores the number of cores (e.g., "SLURM_CPUS_PER_TASK"). If that env + # var is missing or empty then default to 1 core. + # + # NOTE: on some clusters, you may want to also set the environment variable + # `OMP_NUM_THREADS=1` to prevent some R parallelization code from trying to + # grab all cores on the node, even though you were only allocated a subset by + # the batch scheduler. + cores: 8 + +# TOGGLE ----------------------------------------------------------------------- +# This section provides an easy way to turn on or off entire parts of the +# analysis. +toggle: + + # Import Salmon results instead of featureCounts? This is global; if you want + # to compare featureCounts and Salmon then leave this to FALSE and use the + # `salmon=TRUE` argument to lcdbwf::make_dds. + salmon: FALSE + + # Import Kallisto results instead of featureCounts? See similar notes above + # for Salmon. + kallisto: FALSE + + # Create diagnostic plots for all dds objects? + dds_diagnostics: TRUE + + # Create diagnostic plots for results objects? If TRUE, will check the + # config$plotting$diagnostics_for_results list. + results_diagnostics: TRUE + +# ANNOTATION ------------------------------------------------------------------- +# Configuration specific to annotations and databases +annotation: + + # Specify the genus and species. This will be used to search AnnotationHub. + genus_species: "Mus musculus" + + # You can optionally manually search AnnotationHub to find a different + # annotation keys for OrgDb and/or TxDb and use them here to override the + # default latest versions. + orgdb_key_override: NULL + txdb_key_override: NULL + + # The OrgDb will be downloaded here and subsequently read directly from this + # directory + hub_cache: "../../../include/AnnotationHubCache" + localHub: FALSE + force: FALSE + + # What is the format of the gene IDs in rownames(dds)? + keytype: 'ENSEMBL' + + # What is the format of the gene IDs in msigdbr? + msigdbr_keytype: "ensembl_gene" + + # For each column here, the OrgDb will be queried using the gene IDs in the + # rownames of the results object (type is specified by "keytype" above) and + # the resulting values will be attached to the results object. + # + # Use columns(orgdb) to figure out what the options are for your particular + # OrgDb. + orgdb_columns: + - SYMBOL + - ALIAS + - UNIPROT + - ENTREZID + + # For each column listed here, if a value is NA when searching the + # AnnotationHub (as described above) then it will be filled in with the + # corresponding value from rownames(results_object). Useful for when you want + # use symbol where possible but fill in with the Ensembl ID when symbol is + # not available. + fill: + - SYMBOL + + # Which column to use when labeling plots? E.g., MA and volcano plots. Must + # be one of "orgdb_columns". + label_column: SYMBOL + + +# FUNCTIONAL ENRICHMENT -------------------------------------------------------- +# Configuration specific to functional enrichment as performed in +# functional_enrichment.Rmd +functional_enrichment: + + ontologies: + # Standard Gene Ontology + "BP": "GO Biological Process" + "CC": "GO Cellular Component" + "MF": "GO Molecular Function" + + # KEGG pathway analysis + # "KEGG": "KEGG Pathways" + + ## MSigDB (compiled for human, but orthologs are available which may be of + ## questionable utility for other organisms). To see what's available, use + ## lcdbwf::available_msigdb_keys() and cross-reference with + ## https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp + #"C2_CGP": "MSigDB chemical and genetic perturbations" + #"C2_CP": "MSigDb Canonical pathways" + #"C5_GO:BP": "MSigDB's version of BP" + #"C5_GO:CC": "MSigDB's version of CC" + #"C5_GO:MF": "MSigDB's version of MF" + #"C8": "MSigDB cell-type-specific gene sets" + #"H": "MSigDB hallmark gene sets" + + # Type can be "OR" for overrepresentation analysis, or "GSEA" for gene set + # enrichment analysis. + kind: + - "OR" + + # Options here are "up", "down" or "changed". + directions: + - "up" + - "down" + - "changed" + + # Any GO terms above these cutoffs will not be reported. Set to 1 if you're + # fine getting plots even with no statistically significant terms. + pvalueCutoff: 1 + qvalueCutoff: 1 diff --git a/lib/lcdbwf/tests/testthat/text.yaml b/lib/lcdbwf/tests/testthat/text.yaml new file mode 100644 index 00000000..348a0325 --- /dev/null +++ b/lib/lcdbwf/tests/testthat/text.yaml @@ -0,0 +1,244 @@ +# Text to be included into RMarkdown files. +# +# This keeps the RMarkdown uncluttered and avoids cumbersome lcdbwf::mdcat() +# calls within for-loops, while still retaining the ability to easily edit. +# +# When adding new entries, note that the "|" is important -- see +# https://yaml-multiline.info/ for more details. + + +# SAMPLE SIMILARITY AND QC +qc: + clustered_heatmap: | + The following heatmap shows a hierarchical clustering of pairwise distances + between samples. Darker blue means less distant (i.e. more similar). In general + we expect to see replicates clustering together and separation of treatments. + + pca: | + Another way of looking at sample clustering is principal components + analysis (PCA). The x- and y-axes do not have units, rather, they represent + the dimensions along which the samples vary the most. The amount of + variance explained by each principal component is indicated in the axes + label. + +# SIZE FACTORS ----------------------------------------------------------------- +sizefactors: | + ## Size factors {.tabset} + + Ideally, all libraries were sequenced to identical depth, in which case all + size factors would be 1.0. In practice, this is almost never the case due to + the difficulties of accurately measuring low concentrations of cDNA. DESeq2 uses size + factor estimates to normalized for sequencing depth across + libraries. If some libraries are much higher or lower than + 1 then those libraries had dramatically different coverage and we should be + careful about interpreting results. + + Simply taking the total number of reads has been shown to be too sensitive to + a small number of highly-expressed genes. DESeq2's size factors are + calculated according to the median-ratio method (equation 5 of [Anders + & Huber + 2010](http://dx.doi.org/10.1186/gb-2010-11-10-r106)). + + These diagnostic plots show the size factors (as a ranked bar plot) and the + relationship between the size factors and the total read count (as + a scatterplot). Samples whose total read count differs from size factor may + indicate that the sample has a small number of highly expressed genes. + +# DDS DIAGNOSTICS -------------------------------------------------------------- +dds_diagnostics: + dispersion: | + See the [dispersion + estimates](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#dispersion-plot-and-fitting-alternatives) + section of the DESeq2 vignette for details. + + colData: | + This table lists the samples included in the dds object. + + outliers: | + Are there any samples with many outliers? See the [approach to count + outliers](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#approach-to-count-outliers) + section of the DESeq2 vignette for details. + + sparsity: | + This plot can be used to see if the majority of genes with high counts + are coming from individual (or a small number of) samples. + + design_matrix: | + This table shows the design matrix of the dds object. + + +# RESULTS TABLE --------------------------------------------------------------- +# Help for the columns in the results summary table. +results_table: | + - The **row names** of the table are the long-format human-readable labels of + each contrast + - **name** column is the short name used to generate filenames. Upset plots, + output TSVs and worksheets in the Excel spreadsheet will be labeled with + this short name. + - **up** and **down** are the numbers of up- and downregulated genes respectively. + - **nonzero.vs.total:** the number of genes with nonzero read counts + and the total number of annotated genes. + - **alpha:** genes with an adjusted pvalue lower than this are considered + significantly changed. + - **lfcThreshold:** by default, the null hypothesis is that the log2 fold + change of genes is not different from zero. In some circumstances, it is + useful to use a different threshold, which will be reported here. + - **outliers:** Cook's distance is used as a measure of how much a single + sample is influencing the fitted coefficients for a gene. If that value + is too high, the gene is marked as an outlier and the pvalue and adjusted + pvalue will be set to NA. If there are many (hundreds to thousands) of + outliers, this is an indication that a sample may be problematic. In this + case, the dds diagnostics plots may help identify the culprit. + - **low.counts:** How many genes were not even tested for differential + expression because they had too low counts. + - **dds:** The name of the DESeqDataSet object used for the contrast (see + "other diagnostics" section above for details). + - **design:** the design, using R syntax. Originally described by + [Wilkinson + 1973](https://www.jstor.org/stable/2346786?seq=2#metadata_info_tab_contents), + but see the [section in R for Data + Science](https://r4ds.had.co.nz/model-basics.html#formulas-and-model-families) + as well. + - **test:** The contrast performed using the design. Words in this column + indicate the contrast was specified to DESeq2 using a 3-item character + vector. Numbers (like `0,+1`) indicate a numeric contrast. For more details + on the design, check the design matrix tab in the diagnostics section for + the corresponding dds object matching the name in the *dds* column. + + +# RESULTS PLOTS ---------------------------------------------------------------- +# Help for the tabbed results plots +results_plots: + ma: | + An **M-A plot** gives a good overview of the comparison. There is one dot per gene. + It shows three dimensions: the normalized counts (baseMean, on a log10 scale, + x-axis), the effect size (log2FoldChange, y-axis) and whether or not a gene was + signficant (color). While it is tempting to interpret baseMean as + "expression level", this would not be correct because the values are only + normalized to library size, not transcript length. We can say that a gene + with higher baseMean than another gene has more observed reads, but we + cannot say whether that is because it has a longer transcript or because + there are more transcripts. + + volcano: | + A **volcano plot** shows fewer dimensions than an M-A plot (it does not show + normalized counts) but it can be useful for looking at the relationship of + adjusted p-value and log2FoldChange. + + pval_hist: | + The **raw p-value distribution** helps us assess if the data met the + assumptions of DESeq2. This [Variance Explained blog + post](http://varianceexplained.org/statistics/interpreting-pvalue-histogram/) + has a good explanation of intepretation. Additionally, we color the p-values + for genes that have been removed from independent filtering due to having too + low counts. + + See the DESeq2 vignette on [independent filtering and multiple + testing](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#independent-filtering-and-multiple-testing) + for more details. + + +# RESULTS DIAGNOSTICS ---------------------------------------------------------- +# Individual help for the diagnostic plots for results objects +results_diagnostics: + filter_ma: | + This MA plot colors genes by whether or not they were filtered out in the + independent filtering step. + + outlier_ma: | + This MA plots colors genes by whether or not they were considered an outlier. + + lfcse_basemean: | + This plots the standard error of the log2FoldChange (lfcSE) vs baseMean. In + general, as baseMean increases the lfcSE decreases. + + lfcse_lfc: | + Standard error of the log2FoldChange vs the log2FoldChange itself. + + +# UPSET PLOTS ------------------------------------------------------------------ +upset_plots: | + # UpSet plots {.tabset} + + Here we gather together all the interesting gene sets into an [UpSet + plot](http://caleydo.org/tools/upset/). These plots show the combinatorial + overlaps of genes found to be up or down across the different contrasts + performed. It's much like a Venn diagram, but easier to interpret and can scale + to many comparisons. + + The plot shows a summary of which genes were found in common across + contrasts. If you want to know the details of *which* genes were found in + common, a TSV file is linked under each plot. This file has rows for each + gene and columns for each contrast. A `1` indicates that gene was found to be + up/down/changed in that contrast. You can sort this TSV to identify the genes + of interest. For example, sorting all columns in descending order will cause + genes shared in all contrasts (a `1` in each column) to come to the top. + + **Interpretation notes:** A gene can only be found in one column in an UpSet + plot. So if you want to confirm that the number of genes for a contrast + matches the results tables and MA plots, sum all the bars for which there is + a dot in that contrast's row. + +results_files: + + The best way to use these is to rank by the log2FoldChange column, and then + consider genes whos padj value falls below 0.1. + + **A note on NA values:** DESeq2 uses NA to encode extra information about + a gene, depending on which column the NA is found in. + + - If log2FoldChange, pvalue, and padj all NA it means that the baseMean was + 0. That is, no samples in this contrast had any reads. + + - If only pvalue and padj are NA it means that the gene was detected as + a count outlier. + + - If only padj is NA, it means the gene had too low counts and was filtered + out from multiple testing because it had no chance of being significant, + thereby reducing the harshness of the rest of the multiple testing + adjustments. + + The output files have the following columns. + + + - **gene** is typically the most stable accession. For most organisms, this + is Ensembl. Human-readable names can be inconsistent (or missing in many + cases), so these IDs provide the most robust way of ensuring each gene has + a unique ID. + - **SYMBOL, ALIAS, UNIPROT, ENTREZID**, and possibly others, are columns with + additional gene identifiers. Typically SYMBOL and ALIAS will be the most + human-readable. + - **baseMean** is the average number of normalized reads across all samples. + It is used for estimating dispersion and gives an indication of how much + evidence there is for a gene. It should not be interpreted as expression + level, since these values are not normalized by transcript length (which + would introduce various biases). + - **log2FoldChange** is the effect size of differential expression. DESeq2 + first calculates the simple log2 fold change using the mean of normalized + counts for the condtions being compared. However, both 1/5 and 1000/5000 + give a fold change of 5x, but the latter has much more information and we + would want to put more stake in that. So DESeq2 uses log2 fold change + shrinkage methods to down-weight 1/5 to something smaller than 5x so that + we don't design follow-up experiments on results with low information. For + more details, see the papers on + [apeglm](https://doi.org/10.1093/bioinformatics/bty895), + [ashr](https://doi.org/10.1093/biostatistics/kxw041), and + [normal](https://doi.org/10.1186/s13059-014-0550-8). **This is designed to + be the most important column to sort on.**. + - **lfcSE** is the standard error of the log2FoldChange estimate. In general, + the log2 fold change for genes with high lfcSE will be shrunken more. This + column is sometimes useful for diagnostics but not routinely used. + - **stat** is the test statistic used for generating the p-value. It can + sometimes be useful for diagnostics but is not routinely used. + - **pvalue** is the *raw pvalue*. **Do not interpret these values** because + they are not adjusted for multiple comparisons. This column is included + because sometimes it has an NA, which can be informative (see above notes + on NA values). + - **padj** is the pvalue, adjusted for multiple tests. This is required to + weed out bad data that looks good. See this [StatQuest + video](https://www.youtube.com/watch?v=K8LQSvtjcEo) for an explanation. + **Use this column** to assess statistical significance. It is equivalent to + the false discovery rate. Unless otherwise noted, use the default of 0.1 as + the threshold. This means that out of all the genes that have padj values + less than 0.1, we may be wrong on 10% of those. In other words, 10% of the + significant genes aren't actually significant. From 9006f0d9260b706316a4c5d816a245b3a4a58b1b Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Wed, 22 May 2024 13:11:28 -0400 Subject: [PATCH 55/93] functionalized code to improve readability and reduce repetative lines --- lib/lcdbwf/tests/testthat/test-functions.R | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 lib/lcdbwf/tests/testthat/test-functions.R diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R new file mode 100644 index 00000000..30dca7f4 --- /dev/null +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -0,0 +1,84 @@ +# First row of dataframe that will be exported as featurecounts.txt. Note that in featurecounts.txt files that are generated by featurecounts only have 1 line for the first row. +# in R, we need 12 columns in each row hence the 11 NAs added to the end of fc_row1 +fc_row1 <- c("# Program:featureCounts v2.0.3; Command:\"featureCounts\" \"-s2\" \"-p\" \"--countReadPairs\" \"-T\" \"16\" \"-a\" \"/data/NICHD-core0/references/mouse/gencode_m33/annotation/mouse_gencode_m33.gtf\" \"-o\" \"data/rnaseq_aggregation/featurecounts.txt\" \"data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam\"", rep(NA, 11)) + +# Function to make the featurecounts.txt file and write it to this directory +make_featurecounts_file <- function(fc_nrow = 3000, filename = "featurecounts.txt", seed = 1) { + if (file.exists('featurecounts.txt')) { return('featurecounts.txt exists, exiting the function') } + # Create the same random data for every test + set.seed(seed) + # First row contains info on featureCounts command call including parameters and input files + # I saved that to a fc_row1.R as to not clutter this script + + # Create a data frame with example data + data <- data.frame( + Geneid = paste0("ENSMUSG", formatC(sample(10000:99999, fc_nrow, replace = FALSE), width = 10, flag = "0"), + ".", sample(1:20, fc_nrow, replace = TRUE)), # Append version IDs to test strip dotted + Chr = paste0("chr", sample(1:19, fc_nrow, replace = TRUE)), + Start = sample(30000000:99990000, fc_nrow), + End = sample(30001000:99999999, fc_nrow), + Strand = sample(c("+", "-"), fc_nrow, replace = TRUE), + Length = sample(500:15000, fc_nrow, replace = TRUE) + ) + + # Define sample paths based on lcfbwf featurecounts samplename format + sample_names <- c( + "data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam", + "data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam", + "data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam", + "data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam", + "data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam", + "data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam" + ) + # Simulate counts for each sample + counts <- matrix(sample(0:20000, fc_nrow, replace = TRUE), ncol = 6) + colnames(counts) <- sample_names + # Combine gene data and counts + feature_counts <- cbind(data, counts) + fc_row2 <- colnames(feature_counts) + colnames(feature_counts) <- NULL + feature_counts <- rbind(fc_row2, feature_counts) + # Add metadata to row1 saved in functions.R, colnames to row2 and data to the remaining `fc_nrow` rows + feature_counts <-rbind(fc_row1, feature_counts) + + # Write the data frame to a text file + write.table(feature_counts, file = filename, sep = "\t", quote = FALSE, row.names = FALSE) + return(paste("File saved as", filename)) +} # make_featureCounts_file + +# Helper function to make minimal default design data. design_data is an argument and +# object of type list that is passed to make_dds() +make_design_data <- function() { + lst <- list( + # Create the sample table + sampletable = data.frame( + samplename = c("sample1", "sample2", "sample3", "sample4", "sample5", "sample6"), + condition = c(rep("control", 3), rep("treatment", 3))), + design = ~ condition + ) # lst + lst$sampletable$condition <- as.factor(lst$sampletable$condition) + return(lst) +} # make_default_wald_design_data + +make_dds_list <- function() { + # Create design data and dds object for Wald test type + wald_design_data <- make_design_data() + make_featurecounts_file() # Write 'featurecounts.txt' if it does not exist + dds_wald <- make_dds(wald_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Create design data and dds object for LRT test type + lrt_design_data <- make_design_data() + lrt_design_data$test <- 'LRT' + lrt_design_data$reduced_design <- ~1 + dds_lrt <- make_dds(lrt_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Create dds_list + dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) + return(list(dds_list=dds_list, lrt_design_data=lrt_design_data)) +} From 03c856188f67efa1e54e8da7ff7702530d619df2 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Wed, 22 May 2024 13:12:23 -0400 Subject: [PATCH 56/93] added tests for make_results. Need to add tests that breakthings --- lib/lcdbwf/tests/testthat/test-contrasts.R | 165 +++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 lib/lcdbwf/tests/testthat/test-contrasts.R diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R new file mode 100644 index 00000000..3d4229c5 --- /dev/null +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -0,0 +1,165 @@ +library(DESeq2) +devtools::load_all('../../../../lib/lcdbwf') +config <- lcdbwf:::load_config('config.yaml') +library(testthat) +source('test-functions.R') +library(rlang) +library(stringr) + +# Helper function to run make_results and check the output +make_results_and_check <- function(dds_name=dds_name, + label=label, + test=NULL, + type=NULL, + lrt_design_data.=lrt_design_data, + contrast=NULL, + coef=NULL) { + print(label) + if (type != 'apeglm') { + # Use the 'contrast' argument when shrinkage type != 'apeglm' + res <- make_results(dds_name=dds_name, + label=label, + test=test, + type=type, + contrast=contrast) + } else if (type == 'apeglm') { + # Use the 'coef' argument instead of 'contrast' when shrinkage type == 'apeglm' + res <- make_results(dds_name=dds_name, + label=label, + test=test, + type=type, + coef=coef) + } + + # Check that res were returned by make_res + expect_true(!is.null(res)) + # Check that the res element returned by make_res is a DESeqres object + expect_true(inherits(res$res, "DESeqResults")) + # Save a character representing the source of LRT pvalue for comparison with make_results output + lrt_mcols_description <- paste0(as.character(lrt_design_data.$design)[1], " ", + as.character(lrt_design_data.$design)[2], "' vs '", + as.character(lrt_design_data.$reduced_design)[1], " ", + as.character(lrt_design_data.$reduced_design)[2], "'") + + # Check the metadata in res for correct test and coef/contrast + # based on each combination of test, type and coef/contrast arguments + if ((is.null(test) || test == 'Wald') && type == 'apeglm') { + coef <- str_split(coef, "_")[[1]] + expected_char <- paste(test, "test p-value:", coef[1], coef[2], coef[3], coef[4]) + expect_true(mcols(res$res)$description[4] == expected_char) + # Check that res for the correct test was extracted for all tests and types excluding 'apeglm' + } else if ((is.null(test) || test == 'Wald') && type != 'normal') { + expected_char <- paste(test, "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if ((is.null(test) || test == 'Wald') && type == 'normal') { + expected_char <- paste(test, "statistic:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if (test == 'LRT' && type != 'normal') { + expected_char <- paste0(test, " p-value: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if (test == 'LRT' && type == 'normal') { + expected_char <- paste0(test, " statistic: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + } else { + stop(paste(label, 'was not checked')) + } + + # mdcat(mcols(res_i)$description[2]) # For log fold change value possibly want to test for LRT + # Check that the metadata of the res object includes the correct shrinkage type + if (!is.null(type)) { + expect_true(metadata(res$res)$type == type) + } else if (is.null(type)) { + expect_true(is.null(metadata(res$res)$type)) + } +} # make_results_and_check + +# Get the dds_list containing dds_wald and dds_lrt dds objects +# Also save the full and reduced design formulas used to create dds_lrt +dds_and_lrt_design <- make_dds_list() +dds_list <- dds_and_lrt_design$dds_list +lrt_design_data <- dds_and_lrt_design$lrt_design_data + +# Test all combinations of test and type +# NULL shrinkage type skips lfcshrink +# NULL test type runs Wald test (default test) +tests <- list('Wald', 'LRT', NULL) +shrinkage_types <- list(NULL, 'ashr', 'apeglm', 'normal') + +for (test in tests) { + for (type in shrinkage_types) { + if (test == 'Wald' || is.null(test)) { + dds_name <- 'dds_wald' + } else if (test == 'LRT') { + dds_name <- 'dds_lrt' + } + test_label <- paste0("test=", ifelse(is.null(test), "NULL/default (Wald)", test), ", type=", ifelse(is.null(type), "NULL (Skip)", type)) + test_that(paste("make_results works correctly with", test_label), { + if (type != 'apeglm') { + make_results_and_check(dds_name=dds_name, + label=test_label, + test=test, + type=type, + lrt_design_data=lrt_design_data, + contrast=c("condition", "treatment", "control")) + } else if (type == 'apeglm') { + make_results_and_check(dds_name=dds_name, + label=test_label, + test=test, + type=type, + lrt_design_data, + coef="condition_treatment_vs_control") + } # else if type == 'apeglm' + }) # test_that make_results works correctly with each combination of test and type + } # for type in shrinkage_types +} # for test in tests + +test_that("make_results errors on invalid 'test' option", { + design_data <- make_design_data() + design_data$test <- "invalid_test_option" + design_data$reduced_design <- ~1 + + expect_error(make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel), + "Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'") +}) + +test_that("make_results can handle dds object directly", { + design_data <- make_design_data() + design_data$test <- 'Wald' + make_featurecounts_file() + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Directly pass the dds object + results <- make_results(dds_name=dds, + label='Direct DDS', + test='Wald', + type='ashr', + contrast=c("condition", "treated", "control")) + + # Check that the res element is a DESeqResults object + expect_true(inherits(results$res, "DESeqResults")) + # Check that the metadata of the results object includes the correct type + expect_true(metadata(results$res)$type == "ashr") +}) + +test_that("make_results handles missing 'samplename' column", { + design_data <- make_design_data() + design_data$test <- 'Wald' + make_featurecounts_file() + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Remove the 'samplename' column to trigger error + colData(dds)$samplename <- NULL + + expect_error(dds_coefs(dds, colour=='white'), + "Need to have 'samplename' as a column in colData") +}) + From a39b292659ea08c10809fc17993ab904e24f6094 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 23 May 2024 11:36:43 -0400 Subject: [PATCH 57/93] functionalized working code and it broke here, fixing metadata(res$res)$type --- lib/lcdbwf/tests/testthat/test-contrasts.R | 123 +++------------------ lib/lcdbwf/tests/testthat/test-functions.R | 121 ++++++++++++++++++-- 2 files changed, 130 insertions(+), 114 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 3d4229c5..09c3d077 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -1,118 +1,27 @@ library(DESeq2) -devtools::load_all('../../../../lib/lcdbwf') -config <- lcdbwf:::load_config('config.yaml') library(testthat) -source('test-functions.R') library(rlang) library(stringr) - -# Helper function to run make_results and check the output -make_results_and_check <- function(dds_name=dds_name, - label=label, - test=NULL, - type=NULL, - lrt_design_data.=lrt_design_data, - contrast=NULL, - coef=NULL) { - print(label) - if (type != 'apeglm') { - # Use the 'contrast' argument when shrinkage type != 'apeglm' - res <- make_results(dds_name=dds_name, - label=label, - test=test, - type=type, - contrast=contrast) - } else if (type == 'apeglm') { - # Use the 'coef' argument instead of 'contrast' when shrinkage type == 'apeglm' - res <- make_results(dds_name=dds_name, - label=label, - test=test, - type=type, - coef=coef) - } - - # Check that res were returned by make_res - expect_true(!is.null(res)) - # Check that the res element returned by make_res is a DESeqres object - expect_true(inherits(res$res, "DESeqResults")) - # Save a character representing the source of LRT pvalue for comparison with make_results output - lrt_mcols_description <- paste0(as.character(lrt_design_data.$design)[1], " ", - as.character(lrt_design_data.$design)[2], "' vs '", - as.character(lrt_design_data.$reduced_design)[1], " ", - as.character(lrt_design_data.$reduced_design)[2], "'") - - # Check the metadata in res for correct test and coef/contrast - # based on each combination of test, type and coef/contrast arguments - if ((is.null(test) || test == 'Wald') && type == 'apeglm') { - coef <- str_split(coef, "_")[[1]] - expected_char <- paste(test, "test p-value:", coef[1], coef[2], coef[3], coef[4]) - expect_true(mcols(res$res)$description[4] == expected_char) - # Check that res for the correct test was extracted for all tests and types excluding 'apeglm' - } else if ((is.null(test) || test == 'Wald') && type != 'normal') { - expected_char <- paste(test, "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if ((is.null(test) || test == 'Wald') && type == 'normal') { - expected_char <- paste(test, "statistic:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if (test == 'LRT' && type != 'normal') { - expected_char <- paste0(test, " p-value: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if (test == 'LRT' && type == 'normal') { - expected_char <- paste0(test, " statistic: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) - } else { - stop(paste(label, 'was not checked')) - } - - # mdcat(mcols(res_i)$description[2]) # For log fold change value possibly want to test for LRT - # Check that the metadata of the res object includes the correct shrinkage type - if (!is.null(type)) { - expect_true(metadata(res$res)$type == type) - } else if (is.null(type)) { - expect_true(is.null(metadata(res$res)$type)) - } -} # make_results_and_check - -# Get the dds_list containing dds_wald and dds_lrt dds objects -# Also save the full and reduced design formulas used to create dds_lrt -dds_and_lrt_design <- make_dds_list() -dds_list <- dds_and_lrt_design$dds_list -lrt_design_data <- dds_and_lrt_design$lrt_design_data +devtools::load_all('../../../../lib/lcdbwf') +config <- lcdbwf:::load_config('config.yaml') +source('test-functions.R') # Test all combinations of test and type -# NULL shrinkage type skips lfcshrink -# NULL test type runs Wald test (default test) +# NULL shrinkage type skips lfcShrink +# NULL test type runs Wald (default test) tests <- list('Wald', 'LRT', NULL) -shrinkage_types <- list(NULL, 'ashr', 'apeglm', 'normal') - -for (test in tests) { - for (type in shrinkage_types) { - if (test == 'Wald' || is.null(test)) { - dds_name <- 'dds_wald' - } else if (test == 'LRT') { - dds_name <- 'dds_lrt' - } - test_label <- paste0("test=", ifelse(is.null(test), "NULL/default (Wald)", test), ", type=", ifelse(is.null(type), "NULL (Skip)", type)) - test_that(paste("make_results works correctly with", test_label), { - if (type != 'apeglm') { - make_results_and_check(dds_name=dds_name, - label=test_label, - test=test, - type=type, - lrt_design_data=lrt_design_data, - contrast=c("condition", "treatment", "control")) - } else if (type == 'apeglm') { - make_results_and_check(dds_name=dds_name, - label=test_label, - test=test, - type=type, - lrt_design_data, - coef="condition_treatment_vs_control") - } # else if type == 'apeglm' - }) # test_that make_results works correctly with each combination of test and type - } # for type in shrinkage_types -} # for test in tests +shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) +contrast <- c("condition", "treatment", "control") +coef <- "condition_treatment_vs_control" +# Make the dds_list containing dds_wald and dds_lrt dds objects +# Also save the full and reduced design formulas used to create dds_lrt +#dds_and_lrt_design <- make_lists() +#dds_list <- dds_and_lrt_design$dds_list # The get_dds call in make_results requires dds_list to be in .GlobalEnv +#lrt_design_data <- dds_and_lrt_design$lrt_design_data +test_make_results(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) +# Now we intentionally call make_results with incompatible sets of parameters +# based on what I think is likely test_that("make_results errors on invalid 'test' option", { design_data <- make_design_data() design_data$test <- "invalid_test_option" diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index 30dca7f4..ab9617c7 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -1,14 +1,14 @@ -# First row of dataframe that will be exported as featurecounts.txt. Note that in featurecounts.txt files that are generated by featurecounts only have 1 line for the first row. -# in R, we need 12 columns in each row hence the 11 NAs added to the end of fc_row1 -fc_row1 <- c("# Program:featureCounts v2.0.3; Command:\"featureCounts\" \"-s2\" \"-p\" \"--countReadPairs\" \"-T\" \"16\" \"-a\" \"/data/NICHD-core0/references/mouse/gencode_m33/annotation/mouse_gencode_m33.gtf\" \"-o\" \"data/rnaseq_aggregation/featurecounts.txt\" \"data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam\"", rep(NA, 11)) - # Function to make the featurecounts.txt file and write it to this directory make_featurecounts_file <- function(fc_nrow = 3000, filename = "featurecounts.txt", seed = 1) { if (file.exists('featurecounts.txt')) { return('featurecounts.txt exists, exiting the function') } # Create the same random data for every test set.seed(seed) # First row contains info on featureCounts command call including parameters and input files - # I saved that to a fc_row1.R as to not clutter this script + # First row of dataframe that will be exported as featurecounts.txt. Note that in featurecounts.txt + # files that are generated by featurecounts only have 1 line for the first row. + # in R, we need 12 columns in each row hence the 11 NAs added to the end of fc_row1 + fc_row1 <- c("# Program:featureCounts v2.0.3; Command:\"featureCounts\" \"-s2\" \"-p\" \"--countReadPairs\" \"-T\" \"16\" \"-a\" \"/data/NICHD-core0/references/mouse/gencode_m33/annotation/mouse_gencode_m33.gtf\" \"-o\" \"data/rnaseq_aggregation/featurecounts.txt\" \"data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam\"", rep(NA, 11)) + # Create a data frame with example data data <- data.frame( @@ -60,7 +60,7 @@ make_design_data <- function() { return(lst) } # make_default_wald_design_data -make_dds_list <- function() { +make_lists <- function() { # Create design data and dds object for Wald test type wald_design_data <- make_design_data() make_featurecounts_file() # Write 'featurecounts.txt' if it does not exist @@ -81,4 +81,111 @@ make_dds_list <- function() { # Create dds_list dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) return(list(dds_list=dds_list, lrt_design_data=lrt_design_data)) -} +} # make_dds_list + +# Helper function to check the output of make_results +check_results <- function(res, lrt_design_data, label, contrast = NULL, coef = NULL, test = NULL, type = NULL) { + print(label) + # Check that results were returned by make_res + expect_true(!is.null(res)) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + # Check that the res element returned by make_res is a DESeqResults object + expect_true(inherits(res$res, "DESeqResults")) + # Save a character representing the source of LRT pvalue for comparison with make_results output + lrt_mcols_description <- paste0(as.character(lrt_design_data$design)[1], " ", + as.character(lrt_design_data$design)[2], "' vs '", + as.character(lrt_design_data$reduced_design)[1], " ", + as.character(lrt_design_data$reduced_design)[2], "'") + + # Check the metadata in res for correct test and coef/contrast + # based on each combination of test, type and coef/contrast arguments + + if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'ashr')) { + # test == 'Wald' and type == 'ashr' -- OR -- test == NULL and type == 'ashr' + expected_char <- paste(test %||% 'Wald', "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'apeglm')) { + # test == 'Wald' and type == 'apeglm' -- OR -- test == NULL and type == 'apeglm' + coef <- str_split(coef, "_")[[1]] + expected_char <- paste(test %||% 'Wald', "test p-value:", coef[1], coef[2], coef[3], coef[4]) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if ((is.null(test) || test == 'Wald') && (is.null(type) || type == 'normal')) { + # test == 'Wald' and type == 'normal', -- OR -- test == 'Wald' and type == NULL + # test == NULL and type == 'normal' -- OR -- test == NULL and type == NULL + expected_char <- paste(test %||% 'Wald', "statistic:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type != 'normal')) { + # test == 'LRT' and type == 'ashr' -- OR -- test == 'LRT and type == 'apeglm' + expected_char <- paste0(test, " p-value: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { + # test == 'LRT and type == 'normal' -- OR -- test == 'LRT and type == NULL + expected_char <- paste0(test, " statistic: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + } else { + stop(paste(label, 'was not checked')) + } + + # Check that the make_results defined metadata in the res object includes the correct shrinkage type + if (!is.null(type)) { + print("names of metadata of res:") + print(names(metadata(res$res))) + print("type metadata of res:") + print(metadata(res$res)$type) + print("Expected type:") + print(type) + expect_true(identical(metadata(res$res)$type, type)) + } else if (is.null(type)) { + expect_true(is.null(metadata(res$res)$type)) + } +} # check_results + +# This function calls make_results on all combinations tests and shrinkage types passed in as character vectors +# It then tests the output for the expected results +test_make_results <- function(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) { + + for (test in tests) { + for (type in shrinkage_types) { + if (test == 'Wald' || is.null(test)) { + dds_name <- 'dds_wald' + } else if (test == 'LRT') { + dds_name <- 'dds_lrt' + } + label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") + test_that(paste("make_results works correctly with", label), { + if ((!is.null(test) && test == 'LRT') && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' + check_results(res, lrt_design_data, label, test=test, type=NULL) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' + check_results(res, lrt_design_data, label, test=test, type=type) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { + # No contrast when running test == 'LRT'. But coef is required for shrinkage type == 'apeglm' and 'apeglm' + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + check_results(res, lrt_design_data, label, test=test, type=type) + } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) + } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) + } else if (!is.null(test) && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) + } else if (is.null(test) && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) + } else if (!is.null(test) && type == 'apeglm') { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) + } else if (is.null(test) && type == 'apeglm') { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) + check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) + } else { + stop(paste(label, "was not tested")) + } + }) # test_that make_results works correctly with each combination of test and type + } # for type in shrinkage_types + } # for test in tests +} # test_make_results + From 3c0fe27e1950a180a809253734680612ca243f2d Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 23 May 2024 14:59:47 -0400 Subject: [PATCH 58/93] added check for reduced design when test != 'LRT'. Added users test argument in error message when test is invalid. Added checks for these in test-dds.R --- lib/lcdbwf/R/dds.R | 5 ++- lib/lcdbwf/tests/testthat/test-dds.R | 46 ++++++++++++++++++---------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/lib/lcdbwf/R/dds.R b/lib/lcdbwf/R/dds.R index 9c886181..226515f5 100644 --- a/lib/lcdbwf/R/dds.R +++ b/lib/lcdbwf/R/dds.R @@ -58,9 +58,12 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL, design <- purrr::pluck(design_data, 'design') test <- purrr::pluck(design_data, 'test', .default=default_test) if (!(test %in% c('Wald', 'LRT'))){ - stop("Valid options for test are 'Wald' (default) or 'LRT'") + stop(paste("Valid options for test are 'Wald' (default) or 'LRT'. You chose,", test)) } reduced_design <- purrr::pluck(design_data, 'reduced_design') + if (!is.null(reduced_design) && test != 'LRT') { + stop("You included a reduced design formula but did not specify test = 'LRT'") + } location <- purrr::pluck(design_data, 'filename', .default=featureCounts) salmon <- purrr::pluck(design_data, 'salmon') kallisto <- purrr::pluck(design_data, 'kallisto') diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index cea30d03..f7fbc017 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -5,30 +5,24 @@ library(testthat) source('test-functions.R') # --------- Test strip_dotted_version_from_dds() ---------- # -test_that("stripping gene versions", { +test_that("strip_dotted_version_from_dds works", { dds <- DESeq2::makeExampleDESeqDataSet() # genes are labeled like "gene1", "gene2", etc; change them to "gene1.1", # gene2.2", and so on - rownames(dds) <- paste(rownames(dds), seq(1000), sep='.') - expect_error(lcdbwf:::strip_dotted_version_from_dds(dds), "Gene names don't appear to be Ensembl") - forced <- lcdbwf:::strip_dotted_version_from_dds(dds, force=TRUE) expect_equal(rownames(forced)[1], "gene1") - rownames(dds) <- paste0("ENS", rownames(dds), '.', seq(1000)) fixed <- lcdbwf:::strip_dotted_version_from_dds(dds) expect_equal(rownames(fixed)[1], "ENSgene1") - -}) +}) # test_that # --------------------------------------------------------- # -# ---------- Test make_dds(test = 'LRT' || 'Wald') -------- # +# --------------------- Test make_dds() ------------------- # test_that("make_dds handles minimum required design data of sampletale, and design formula, using example featurecounts table", { design_data <- make_design_data() - make_featurecounts_file() # featurecounts.txt will be written to this directory dds <- make_dds(design_data, config=config, featureCounts='featurecounts.txt', @@ -39,7 +33,7 @@ test_that("make_dds handles minimum required design data of sampletale, # is run with minimal input to make_dds() expect_true(any(grepl('Wald', colnames(mcols(dds))))) expect_false(any(grepl('LRT', colnames(mcols(dds))))) -}) +}) # test_that test_that("make_dds handles design data where Wald test is specified explicitly", { design_data <- make_design_data() @@ -53,7 +47,7 @@ test_that("make_dds handles design data where Wald test is specified explicitly" # Check that the WaldStatistic has been generated when Wald is passed to make_dds() expect_true(any(grepl('Wald', colnames(mcols(dds))))) expect_false(any(grepl('LRT', colnames(mcols(dds))))) -}) +}) # test_that test_that("make_dds handles required design data for LRT", { design_data <- make_design_data() @@ -68,18 +62,38 @@ test_that("make_dds handles required design data for LRT", { # Check that the WaldStatistic has been generated expect_true(any(grepl('LRT', colnames(mcols(dds))))) expect_false(any(grepl('Wald', colnames(mcols(dds))))) -}) +}) # test_that +# Now we intentionally call make_results with incompatible parameters test_that("make_dds errors on invalid 'test' option", { design_data <- make_design_data() design_data$test <- "invalid_test_option" + expect_error(make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel), + paste("Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'. You chose,", design_data$test)) +}) # test_that +test_that("make_dds errors on missing reduced design when 'test' is set to LRT", { + design_data <- make_design_data() + design_data$test <- "LRT" expect_error(make_dds(design_data, config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel), - "Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'") -}) + "When using LRT, reduced_design must be provided") +}) # test_that + +test_that("make_dds errors on missing test argument when reduced design is provided", { + design_data <- make_design_data() + design_data$reduced_design <- ~1 + expect_error(make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel), + "You included a reduced design formula but did not specify test = 'LRT'") +}) # test_that # --------------------------------------------------- # # -------------- collapseReplicates2 ---------------- # @@ -93,7 +107,7 @@ test_that("collapseReplicates2 collapses the three control replicates and three config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel) - + # Perform replicate collapsing dds_collapsed <- collapseReplicates2(dds, dds$condition) colData_collapsed <- as.data.frame(colData(dds_collapsed)) @@ -101,5 +115,5 @@ test_that("collapseReplicates2 collapses the three control replicates and three expect_equal(length(as.character(colData_collapsed$condition[colData_collapsed$condition == 'treatment'])), 1) # Check if the first column of colData matches rownames expect_equal(rownames(colData_collapsed), colData_collapsed[,1]) -}) +}) # test_that # --------------------------------------------------- # From 126eb749ba73b9ccae26701c84d52ba328c16df5 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 23 May 2024 16:27:43 -0400 Subject: [PATCH 59/93] tests are working in test-contrasts.R added code to register workers --- env-r.yml | 1 + lib/lcdbwf/tests/testthat/test-contrasts.R | 85 +++++++++++++++++++--- lib/lcdbwf/tests/testthat/test-functions.R | 39 +++++++--- 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/env-r.yml b/env-r.yml index 4f0a132b..51b9bd9e 100644 --- a/env-r.yml +++ b/env-r.yml @@ -402,3 +402,4 @@ dependencies: - yq=3.1.1 - zlib=1.2.13 - zstd=1.5.2 + - r-future diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 09c3d077..3fa8fb7c 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -2,9 +2,15 @@ library(DESeq2) library(testthat) library(rlang) library(stringr) +library(BiocParallel) +library(future) devtools::load_all('../../../../lib/lcdbwf') config <- lcdbwf:::load_config('config.yaml') source('test-functions.R') +register(MulticoreParam(workers = future::availableCores()), default=TRUE) +#param <- bpparam() +#number_of_cores <- param$workers +#print(number_of_cores) # Test all combinations of test and type # NULL shrinkage type skips lfcShrink @@ -15,24 +21,85 @@ contrast <- c("condition", "treatment", "control") coef <- "condition_treatment_vs_control" # Make the dds_list containing dds_wald and dds_lrt dds objects # Also save the full and reduced design formulas used to create dds_lrt -#dds_and_lrt_design <- make_lists() -#dds_list <- dds_and_lrt_design$dds_list # The get_dds call in make_results requires dds_list to be in .GlobalEnv -#lrt_design_data <- dds_and_lrt_design$lrt_design_data -test_make_results(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) +dds_and_lrt_design <- make_lists() +dds_list <- dds_and_lrt_design$dds_list # The get_dds call in make_results requires dds_list to be in .GlobalEnv +lrt_design_data <- dds_and_lrt_design$lrt_design_data +#test_make_results(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) -# Now we intentionally call make_results with incompatible sets of parameters -# based on what I think is likely +### TESTING ### +#test <- 'Wald' +#type <- 'ashr' +#dds_name <- 'dds_wald' +#contrast <- c("condition", "treatment", "control") +#label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") +############## + +for (test in tests) { + for (type in shrinkage_types) { + if (test == 'Wald' || is.null(test)) { + dds_name <- 'dds_wald' + } else if (test == 'LRT') { + dds_name <- 'dds_lrt' + } + label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") + test_that(paste("make_results works correctly with", label), { + if ((!is.null(test) && test == 'LRT') && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' + check_results(res, lrt_design_data, label, test=test, type=NULL) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' + check_results(res, lrt_design_data, label, test=test, type=type) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { + # No contrast when running test == 'LRT'. But coef is required for shrinkage type == 'apeglm' and 'apeglm' + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + check_results(res, lrt_design_data, label, test=test, type=type) + } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) # Wald, ashr +#print("str(res) ---------------- ") +#print(str(res)) +#print("str(res$res) ---------------- ") +#print(str(res$res)) +#print("names of metadata of res: -----------") +#print(names(metadata(res$res))) +#print("$type of metadata of res: ----------") +#print(metadata(res$res)$type) +#print("Expected type: ------------") +#print(type) + check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) + } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) + } else if (!is.null(test) && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) + } else if (is.null(test) && is.null(type)) { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) + check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) + } else if (!is.null(test) && type == 'apeglm') { + res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) + } else if (is.null(test) && type == 'apeglm') { + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) + check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) + } else { + stop(paste(label, "was not tested")) + } + }) # test_that make_results works correctly with each combination of test and type + } # for type in shrinkage_types +} # for test in tests + +# Now we intentionally call make_results with incompatible parameters test_that("make_results errors on invalid 'test' option", { design_data <- make_design_data() design_data$test <- "invalid_test_option" - design_data$reduced_design <- ~1 + res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) expect_error(make_dds(design_data, config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel), - "Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'") -}) + paste("Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'. You chose,", test)) +}) # test_that make_dds errors on invalid test option test_that("make_results can handle dds object directly", { design_data <- make_design_data() diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index ab9617c7..f9adcc91 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -47,8 +47,9 @@ make_featurecounts_file <- function(fc_nrow = 3000, filename = "featurecounts.tx } # make_featureCounts_file # Helper function to make minimal default design data. design_data is an argument and -# object of type list that is passed to make_dds() +# object of type list that is passed to make_dds() make_design_data <- function() { + make_featurecounts_file() # featurecounts.txt will be written to this directory if it doesn't exist lst <- list( # Create the sample table sampletable = data.frame( @@ -143,7 +144,13 @@ check_results <- function(res, lrt_design_data, label, contrast = NULL, coef = N # This function calls make_results on all combinations tests and shrinkage types passed in as character vectors # It then tests the output for the expected results test_make_results <- function(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) { - +### TESTING ### +#test <- 'Wald' +#type <- 'ashr' +#dds_name <- 'dds_wald' +#contrast <- c("condition", "treatment", "control") +#label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") +### for (test in tests) { for (type in shrinkage_types) { if (test == 'Wald' || is.null(test)) { @@ -154,32 +161,42 @@ test_make_results <- function(tests, shrinkage_types, contrast, coef, dds_list, label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") test_that(paste("make_results works correctly with", label), { if ((!is.null(test) && test == 'LRT') && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' check_results(res, lrt_design_data, label, test=test, type=NULL) } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' check_results(res, lrt_design_data, label, test=test, type=type) } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { # No contrast when running test == 'LRT'. But coef is required for shrinkage type == 'apeglm' and 'apeglm' - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) check_results(res, lrt_design_data, label, test=test, type=type) } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) # Wald, ashr +print("str(res) ---------------- ") +print(str(res)) +print("str(res$res) ---------------- ") +print(str(res$res)) +print("names of metadata of res: -----------") +print(names(metadata(res$res))) +print("$type of metadata of res: ----------") +print(metadata(res$res)$type) +print("Expected type: ------------") +print(type) check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) } else if (!is.null(test) && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) } else if (is.null(test) && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) } else if (!is.null(test) && type == 'apeglm') { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) } else if (is.null(test) && type == 'apeglm') { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) + res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) } else { stop(paste(label, "was not tested")) From 5dc7a49389bf0a0251ab4b0f5c119334ebbf777d Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 28 May 2024 08:24:41 -0400 Subject: [PATCH 60/93] Fixed accidental removal of type argument in make_results call --- .gitignore | 2 + lib/lcdbwf/tests/testthat/config.yaml | 2 +- lib/lcdbwf/tests/testthat/test-contrasts.R | 100 ++++++--------------- lib/lcdbwf/tests/testthat/test-functions.R | 88 +++--------------- 4 files changed, 39 insertions(+), 153 deletions(-) diff --git a/.gitignore b/.gitignore index 80ab39f2..ccb26fb2 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,5 @@ workflows/rnaseq/downstream/results workflows/rnaseq/downstream/rnaseq_cache workflows/rnaseq/downstream/rnaseq_files workflows/rnaseq/downstream/rnaseq.html +*.xlsx +*._* diff --git a/lib/lcdbwf/tests/testthat/config.yaml b/lib/lcdbwf/tests/testthat/config.yaml index 8d9833e5..32b9999e 100644 --- a/lib/lcdbwf/tests/testthat/config.yaml +++ b/lib/lcdbwf/tests/testthat/config.yaml @@ -131,7 +131,7 @@ parallel: # `OMP_NUM_THREADS=1` to prevent some R parallelization code from trying to # grab all cores on the node, even though you were only allocated a subset by # the batch scheduler. - cores: 8 + cores: "SLURM_CPUS_PER_TASK" # TOGGLE ----------------------------------------------------------------------- # This section provides an easy way to turn on or off entire parts of the diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 3fa8fb7c..1b93bb7e 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -3,14 +3,10 @@ library(testthat) library(rlang) library(stringr) library(BiocParallel) -library(future) devtools::load_all('../../../../lib/lcdbwf') config <- lcdbwf:::load_config('config.yaml') source('test-functions.R') -register(MulticoreParam(workers = future::availableCores()), default=TRUE) -#param <- bpparam() -#number_of_cores <- param$workers -#print(number_of_cores) +register(MulticoreParam(config$parallel$cores)) # Test all combinations of test and type # NULL shrinkage type skips lfcShrink @@ -19,12 +15,8 @@ tests <- list('Wald', 'LRT', NULL) shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) contrast <- c("condition", "treatment", "control") coef <- "condition_treatment_vs_control" -# Make the dds_list containing dds_wald and dds_lrt dds objects -# Also save the full and reduced design formulas used to create dds_lrt -dds_and_lrt_design <- make_lists() -dds_list <- dds_and_lrt_design$dds_list # The get_dds call in make_results requires dds_list to be in .GlobalEnv -lrt_design_data <- dds_and_lrt_design$lrt_design_data -#test_make_results(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) +dds_list <- make_dds_list() +lrt_design_data <- make_lrt_design_data() ### TESTING ### #test <- 'Wald' @@ -44,98 +36,58 @@ for (test in tests) { label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") test_that(paste("make_results works correctly with", label), { if ((!is.null(test) && test == 'LRT') && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' + res <- make_results(dds_name=dds_name, label=label, type=NULL) # No contrast when running test == 'LRT' check_results(res, lrt_design_data, label, test=test, type=NULL) } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' + res <- make_results(dds_name=dds_name, label=label, type=type) # No contrast when running test == 'LRT' check_results(res, lrt_design_data, label, test=test, type=type) } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { - # No contrast when running test == 'LRT'. But coef is required for shrinkage type == 'apeglm' and 'apeglm' - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + # 'coef' is required for shrinkage type == 'apeglm' and 'apeglm' + res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) check_results(res, lrt_design_data, label, test=test, type=type) } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) # Wald, ashr -#print("str(res) ---------------- ") -#print(str(res)) -#print("str(res$res) ---------------- ") -#print(str(res$res)) -#print("names of metadata of res: -----------") -#print(names(metadata(res$res))) -#print("$type of metadata of res: ----------") -#print(metadata(res$res)$type) -#print("Expected type: ------------") -#print(type) + res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) # Wald, ashr check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) + res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) } else if (!is.null(test) && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) + res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) } else if (is.null(test) && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) + res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) } else if (!is.null(test) && type == 'apeglm') { - res <- make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) + res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) } else if (is.null(test) && type == 'apeglm') { - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) + res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) } else { stop(paste(label, "was not tested")) } - }) # test_that make_results works correctly with each combination of test and type + }) # test_that } # for type in shrinkage_types } # for test in tests -# Now we intentionally call make_results with incompatible parameters -test_that("make_results errors on invalid 'test' option", { - design_data <- make_design_data() - design_data$test <- "invalid_test_option" - - res <- make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) - expect_error(make_dds(design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel), - paste("Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'. You chose,", test)) -}) # test_that make_dds errors on invalid test option - test_that("make_results can handle dds object directly", { - design_data <- make_design_data() - design_data$test <- 'Wald' - make_featurecounts_file() - dds <- make_dds(design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - + dds <- dds_list[['dds_wald']] # Directly pass the dds object results <- make_results(dds_name=dds, label='Direct DDS', - test='Wald', type='ashr', - contrast=c("condition", "treated", "control")) + contrast=c("condition", "treatment", "control")) # Check that the res element is a DESeqResults object expect_true(inherits(results$res, "DESeqResults")) - # Check that the metadata of the results object includes the correct type - expect_true(metadata(results$res)$type == "ashr") -}) - -test_that("make_results handles missing 'samplename' column", { - design_data <- make_design_data() - design_data$test <- 'Wald' - make_featurecounts_file() - dds <- make_dds(design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - - # Remove the 'samplename' column to trigger error - colData(dds)$samplename <- NULL - - expect_error(dds_coefs(dds, colour=='white'), - "Need to have 'samplename' as a column in colData") -}) +}) # test_that +#test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { +# remove(dds_list) +# expect_error(make_results(dds_name='dds_wald', +# label='missing dds_list', +# type='ashr', +# contrast=c("condition", "treatment", "control")), +# "Can't find dds_list in global environment.") +# +#}) # test_that diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index f9adcc91..4cd8911f 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -61,7 +61,7 @@ make_design_data <- function() { return(lst) } # make_default_wald_design_data -make_lists <- function() { +make_dds_list <- function() { # Create design data and dds object for Wald test type wald_design_data <- make_design_data() make_featurecounts_file() # Write 'featurecounts.txt' if it does not exist @@ -70,10 +70,6 @@ make_lists <- function() { featureCounts='featurecounts.txt', parallel=config$parallel$parallel) - # Create design data and dds object for LRT test type - lrt_design_data <- make_design_data() - lrt_design_data$test <- 'LRT' - lrt_design_data$reduced_design <- ~1 dds_lrt <- make_dds(lrt_design_data, config=config, featureCounts='featurecounts.txt', @@ -81,9 +77,17 @@ make_lists <- function() { # Create dds_list dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) - return(list(dds_list=dds_list, lrt_design_data=lrt_design_data)) + return(dds_list) } # make_dds_list +# Function to create design data for LRT test +make_lrt_design_data <- function() { + lrt_design_data <- make_design_data() + lrt_design_data$test <- 'LRT' + lrt_design_data$reduced_design <- ~1 + return(lrt_design_data) +} # make_lrt_design_data + # Helper function to check the output of make_results check_results <- function(res, lrt_design_data, label, contrast = NULL, coef = NULL, test = NULL, type = NULL) { print(label) @@ -129,80 +133,8 @@ check_results <- function(res, lrt_design_data, label, contrast = NULL, coef = N # Check that the make_results defined metadata in the res object includes the correct shrinkage type if (!is.null(type)) { - print("names of metadata of res:") - print(names(metadata(res$res))) - print("type metadata of res:") - print(metadata(res$res)$type) - print("Expected type:") - print(type) expect_true(identical(metadata(res$res)$type, type)) } else if (is.null(type)) { expect_true(is.null(metadata(res$res)$type)) } } # check_results - -# This function calls make_results on all combinations tests and shrinkage types passed in as character vectors -# It then tests the output for the expected results -test_make_results <- function(tests, shrinkage_types, contrast, coef, dds_list, lrt_design_data) { -### TESTING ### -#test <- 'Wald' -#type <- 'ashr' -#dds_name <- 'dds_wald' -#contrast <- c("condition", "treatment", "control") -#label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") -### - for (test in tests) { - for (type in shrinkage_types) { - if (test == 'Wald' || is.null(test)) { - dds_name <- 'dds_wald' - } else if (test == 'LRT') { - dds_name <- 'dds_lrt' - } - label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") - test_that(paste("make_results works correctly with", label), { - if ((!is.null(test) && test == 'LRT') && is.null(type)) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=NULL) # No contrast when running test == 'LRT' - check_results(res, lrt_design_data, label, test=test, type=NULL) - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type) # No contrast when running test == 'LRT' - check_results(res, lrt_design_data, label, test=test, type=type) - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { - # No contrast when running test == 'LRT'. But coef is required for shrinkage type == 'apeglm' and 'apeglm' - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) - check_results(res, lrt_design_data, label, test=test, type=type) - } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, contrast=contrast) # Wald, ashr -print("str(res) ---------------- ") -print(str(res)) -print("str(res$res) ---------------- ") -print(str(res$res)) -print("names of metadata of res: -----------") -print(names(metadata(res$res))) -print("$type of metadata of res: ----------") -print(metadata(res$res)$type) -print("Expected type: ------------") -print(type) - check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) - } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=type, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) - } else if (!is.null(test) && is.null(type)) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=NULL, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) - } else if (is.null(test) && is.null(type)) { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=NULL, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) - } else if (!is.null(test) && type == 'apeglm') { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=test, type=type, coef=coef) - check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) - } else if (is.null(test) && type == 'apeglm') { - res <- lcdbwf:::make_results(dds_name=dds_name, label=label, test=NULL, type=type, coef=coef) - check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) - } else { - stop(paste(label, "was not tested")) - } - }) # test_that make_results works correctly with each combination of test and type - } # for type in shrinkage_types - } # for test in tests -} # test_make_results - From c0fb3e124a5c444f71e4ddf445cc2e34773daf4b Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 28 May 2024 09:02:28 -0400 Subject: [PATCH 61/93] error in test-results.R --- lib/lcdbwf/tests/testthat/test-functions.R | 1 + lib/lcdbwf/tests/testthat/test-results.R | 107 +++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 lib/lcdbwf/tests/testthat/test-results.R diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index 4cd8911f..45708137 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -70,6 +70,7 @@ make_dds_list <- function() { featureCounts='featurecounts.txt', parallel=config$parallel$parallel) + lrt_design_data <- make_lrt_design_data() dds_lrt <- make_dds(lrt_design_data, config=config, featureCounts='featurecounts.txt', diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R new file mode 100644 index 00000000..1ab1bf65 --- /dev/null +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -0,0 +1,107 @@ +library(testthat) +library(DESeq2) +library(lcdbwf) +library(rlang) +library(stringr) +library(BiocParallel) +library(ggplot2) +devtools::load_all('../../../../lib/lcdbwf') +source('test-functions.R') +config <- lcdbwf:::load_config('config.yaml') +text <- yaml::yaml.load_file('text.yaml') +register(MulticoreParam(config$parallel$cores)) + +# Mock function to capture mdcat output +mdcat_output <- c() +mock_mdcat <- function(...) { + mdcat_output <<- c(mdcat_output, paste(..., collapse = " ")) +} + +# Helper function to create DESeqResults object +make_deseq_results <- function(test='Wald', type='ashr', reduced_design=NULL) { + design_data <- make_design_data() + design_data$test <- test + design_data$reduced_design <- reduced_design + label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") + dds <- make_dds(design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + tmp_dds_list = list(dds=dds) + res <- make_results(dds_name='dds', + label=label, + dds_list=tmp_dds_list, + type=type) + return(list(dds=dds, res=res)) +} + +# ------ Test build_results_tabs function ------ # +# Create objects for testing defaults +dds_and_res <- make_deseq_results() +dds <- dds_and_res$dds +res <- dds_and_res$res$res + +dds_list <- list(dds1=dds) +res_list <- list(res1=list(res=res, dds='dds1', label='Defaults')) + +test_that("build_results_tabs works with default config", { + expect_silent(build_results_tabs(res_list, dds_list, config, text)) +}) + +# Create objects for testing 'LRT' +dds_and_res <- make_deseq_results(test='LRT', reduced_design=~1) +dds <- dds_and_res$dds +res <- dds_and_res$res$res + +dds_list <- list(dds1=dds) +res_list <- list(res1=list(res=res, dds='dds1', label='LRT')) + +# Test build_results_tabs function +test_that("build_results_tabs works with LRT config", { + expect_silent(build_results_tabs(res_list, dds_list, config, text)) +}) + +test_that("build_results_tabs works with diagnostics disabled", { + config$toggle$results_diagnostics <- FALSE + expect_silent(build_results_tabs(res_list, dds_list, config, text)) +}) + +test_that("build_results_tabs works with specific diagnostics results names", { + config$toggle$results_diagnostics <- TRUE + config$plotting$diagnostics_results_names <- c("res1") + expect_silent(build_results_tabs(res_list, dds_list, config, text)) +}) + +test_that("build_results_tabs handles empty res_list", { + expect_silent(build_results_tabs(list(), dds_list, config, text)) +}) + +test_that("check_LRT identifies LRT results correctly", { + res_LRT_result <- create_deseq_results(test='LRT', reduced_design=~1) + res_LRT <- res_LRT_result$res + expect_true(check_LRT(res_LRT)) + + res_Wald_result <- create_deseq_results(test='Wald') + res_Wald <- res_Wald_result$res + expect_false(check_LRT(res_Wald)) +}) + +# Test that mdcat is called with expected values for LRT +test_that("build_results_tabs calls mdcat with expected values for LRT", { + res_LRT_result <- create_deseq_results(test='LRT', reduced_design=~1) + res_LRT <- res_LRT_result$res + dds <- res_LRT_result$dds + dds_list_LRT <- list(dds1=dds) + res_list_LRT <- list(res1=list(res=res_LRT, dds='dds1', label='LRT Test Label')) + + # Capture mdcat output + mdcat_output <<- c() + with_mock( + `lcdbwf:::mdcat` = mock_mdcat, + build_results_tabs(res_list_LRT, dds_list_LRT, config, text) + ) + + expect_false(any(grepl("Wald", mdcat_output))) + expect_true(any(grepl("LRT", mdcat_output))) +}) + From 16dcf70a9a2e71a6a2780b5ecae8560b374ad52b Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 28 May 2024 09:04:39 -0400 Subject: [PATCH 62/93] added featurecounts.txt to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ccb26fb2..52dab8bf 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,4 @@ workflows/rnaseq/downstream/rnaseq_files workflows/rnaseq/downstream/rnaseq.html *.xlsx *._* +lib/lcdbwf/tests/testthat/featurecounts.txt From 7e456c3039f718c948e96808f563e600c4aae36f Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Wed, 29 May 2024 18:30:48 -0400 Subject: [PATCH 63/93] handling errors for passing a non-NULL type with test == 'LRT' --- lib/lcdbwf/R/contrasts.R | 76 +++++++-- lib/lcdbwf/R/dds.R | 3 +- lib/lcdbwf/tests/testthat/featurecounts.txt | 2 +- lib/lcdbwf/tests/testthat/test-contrasts.R | 179 +++++++++++++++++--- lib/lcdbwf/tests/testthat/test-dds.R | 13 +- lib/lcdbwf/tests/testthat/test-functions.R | 137 +-------------- lib/lcdbwf/tests/testthat/test-results.R | 2 +- 7 files changed, 227 insertions(+), 185 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index 22b44a0d..fd0cabec 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -158,35 +158,68 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ } # Modify the args based on what we detected. Note that results() expects the - dots['parallel'] <- parallel + dots[['parallel']] <- parallel # Note that results() expects the argument to be called 'object' rather than, # say, 'dds'. dots[['object']] <- dds + # Initial check on test argument: + # Make sure the 'test' passed to make_results is the test detected in the dds object + if ('test' %in% names(dots)) { + if (dots$test == 'Wald' && (any(grepl('LRTStatistic', names(mcols(dds)))) || + any(grepl('LRTPvalue', names(mcols(dds)))))) { + stop("The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") + } else if (dots$test == 'LRT' && (any(grepl('WaldStatistic', names(mcols(dds)))) || + any(grepl('WaldPvalue', names(mcols(dds)))))) { + stop("The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") + } + } + + # Define 'test' and 'type' in dots for some checks. + # We can't simply reference test and type with dots$test and + # dots$type without requiring their presence because these parameters + # can both be missing and defaults will be used in their place, internally, leaving + # us unable to verify their compatibility. + + # Detect 'test' type when 'test' is missing from dots (make_results call) + if (!'test' %in% names(dots)) { + if (any(grepl('LRTStatistic', names(mcols(dds)))) || + any(grepl('LRTPvalue', names(mcols(dds))))) { + dots$test <- 'LRT' + test_detected <- TRUE + } else if (any(grepl('WaldStatistic', names(mcols(dds)))) || + any(grepl('WaldPvalue', names(mcols(dds))))) { + dots$test <- 'Wald' + test_detected <- TRUE + } else { + stop("test type was missing from make_results call and could not be detected from dds") + } + } + + # In recent versions, lfcShrink 'type' should default to "apeglm". Here we + # are inspecting the function itself if the default ever changes. + if (!'type' %in% names(dots)) { dots$type <- eval(formals(DESeq2::lfcShrink)$type)[1] } + # Call results() with the subset of dots that it accepts. results_dots <- lcdbwf:::match_from_dots(dots, results) res <- do.call("results", results_dots) - # If "type" was specified when calling this function, it's easy and we use - # that. Otherwise, if it was not specified then well use the current DESeq2 default. - # Since that default can change as we have seen in the past, we need to - # inspect the lfcShrink function itself to see what the current default is, - # and use that. - if (!'type' %in% names(dots)) { - # The definition of lfcShrink has a character vector as the type argument, - # and we want to extract the first thing in that vector. But formals() - # return strings, so we need to eval that string to convert it to - # a character vector such that we can extract the first thing. - # - # In recent versions this should evaluate to "apeglm". But this way we - # are inspecting the function itself if it ever changes. - type <- eval(formals(DESeq2::lfcShrink)$type)[1] + # When make_results is called with test set to 'LRT', we impute all rows in the log2FoldChange + # column of the DESeqResults object to 0. LFC values only make sense to report for a single + # comparison of two sample groups. This applies to the Wald test only. + # LRT is instead performing a test of the removal of one or more factor(s) from the design formula. + # DESeq2 reports log2FoldChange values for a single pair-wise comparison when test == 'LRT'. This + # can be misleading and so this is our solution. + if (!is.null(dots$test) && dots$test == 'LRT') { + res$log2FoldChange <- 0 } # While lfcShrink doesn't accept NULL as a type, we're using it here as - # a mechanism to disable lfcShrink altogether. - if (!is.null(type)) { + # a mechanism to disable lfcShrink altogether. Also, it doesn't make + # sense to shrink LRT LFC values as those should remain 0 for + # the reason described above. + if (!is.null(dots$type) && !is.null(dots$test) && dots$test != 'LRT') { # We're about to call lfcShrink, but it needs the res object...so inject the # one we just made into dots. dots[['res']] <- res @@ -199,6 +232,15 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Add the shrinkage type to the metadata of the results object metadata(res)$type <- type + + } else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && !test_detected) { + stop("You cannot pass a non-NULL type to make_results with test == 'LRT'. + For LRT, LFC values are set to 0 and should not be passed to lfcShrink. + Use type == NULL in make_results for LRT DDS objects.") + } else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && test_detected) { + stop("You cannot pass a non-NULL type to make_results with an LRT dds object. + For LRT, LFC values are set to 0 and should not be passed to lfcShrink. + Use type == NULL in make_results for LRT DDS objects.") } else { # Be explicit, and ensure there's always a type attribute metadata(res)$type <- NULL diff --git a/lib/lcdbwf/R/dds.R b/lib/lcdbwf/R/dds.R index 226515f5..a97e23e8 100644 --- a/lib/lcdbwf/R/dds.R +++ b/lib/lcdbwf/R/dds.R @@ -46,7 +46,6 @@ kallisto.path.func <- function (x) file.path('..', 'data', 'rnaseq_samples', x, #' @param featureCounts Location of featureCounts output to be loaded make_dds <- function(design_data, config=NULL, collapse_by=NULL, strip_dotted_version=NULL, - default_test='Wald', featureCounts='../data/rnaseq_aggregation/featurecounts.txt', salmon_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.salmon/quant.sf", kallisto_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.kallisto/abundance.h5", @@ -56,7 +55,7 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL, coldata <- purrr::pluck(design_data, 'sampletable') design <- purrr::pluck(design_data, 'design') - test <- purrr::pluck(design_data, 'test', .default=default_test) + test <- purrr::pluck(design_data, 'test', .default='Wald') if (!(test %in% c('Wald', 'LRT'))){ stop(paste("Valid options for test are 'Wald' (default) or 'LRT'. You chose,", test)) } diff --git a/lib/lcdbwf/tests/testthat/featurecounts.txt b/lib/lcdbwf/tests/testthat/featurecounts.txt index 8749e7fd..99a5fd05 100644 --- a/lib/lcdbwf/tests/testthat/featurecounts.txt +++ b/lib/lcdbwf/tests/testthat/featurecounts.txt @@ -166,4 +166,4 @@ FBgn0046113 chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L 91 FBgn0028481 chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L;chr2L 922793;922793;930065;950492;950492;951587;951587;952256;952256;953396;953396;956249;956249;956404;956404;956860;956860;957052;957052;957279;957279 923219;923219;930423;950578;950578;952142;952142;953333;953333;953510;953510;956334;956334;956792;956792;956944;956944;957209;957209;958098;958098 +;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+;+ 4160 46 19 42 33 FBgn0266036 chr2L;chr2L;chr2L;chr2L;chr2L;chr2L 987807;987807;989417;989772;990870;990870 989104;989357;990311;990311;991300;991300 -;-;-;-;-;- 2877 0 0 0 0 FBgn0267428 chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R 326768;348382;432220;438894;495144;495604;547232;578872;579044;642100;673574;705374 326939;348500;432460;439012;495329;495799;547363;578991;579217;642363;673836;705848 +;+;+;+;+;+;+;+;+;+;+;+ 2461 13 2 5 5 -FBgn0069969 chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R 924636;926097;926097;926097;926097;926735;926735;926735;926735;926735;926972;926972;926972;926972;926972;927186;927186;927186;927186;927186;927356;927356;927356;927356;927356;927781;929051;929051;929051;946776;949871;949871;949871;951127;951127;951127;951127 926682;926682;926682;926682;926682;926902;926902;926902;926902;926902;927127;927127;927127;927127;927127;927297;927297;927297;927297;927297;927570;927570;927570;927570;927570;927986;929292;929292;929292;946843;949946;949946;949946;951252;951252;951252;951252 -;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;- 3416 43 19 44 39 +FBgn0069969 chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R;chr2R 924636;926097;926097;926097;926097;926735;926735;926735;926735;926735;926972;926972;926972;926972;926972;927186;927186;927186;927186;927186;927356;927356;927356;927356;927356;927781;929051;929051;929051;946776;949871;949871;949871;951127;951127;951127;951127 926682;926682;926682;926682;926682;926902;926902;926902;926902;926902;927127;927127;927127;927127;927127;927297;927297;927297;927297;927297;927570;927570;927570;927570;927570;927986;929292;929292;929292;946843;949946;949946;949946;951252;951252;951252;951252 -;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;- 3416 43 19 44 39 \ No newline at end of file diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 1b93bb7e..55829b06 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -4,17 +4,44 @@ library(rlang) library(stringr) library(BiocParallel) devtools::load_all('../../../../lib/lcdbwf') -config <- lcdbwf:::load_config('config.yaml') +config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') register(MulticoreParam(config$parallel$cores)) +# Function to create design data for LRT test +make_lrt_design_data <- function() { + lrt_design_data <- make_design_data() + lrt_design_data$test <- 'LRT' + lrt_design_data$reduced_design <- ~1 + return(lrt_design_data) +} # make_lrt_design_data + +make_dds_list <- function() { + # Create design data and dds object for Wald test type + wald_design_data <- make_design_data() + dds_wald <- make_dds(wald_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + lrt_design_data <- make_lrt_design_data() + dds_lrt <- make_dds(lrt_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Create dds_list + dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) + return(dds_list) +} # make_dds_list + # Test all combinations of test and type # NULL shrinkage type skips lfcShrink # NULL test type runs Wald (default test) tests <- list('Wald', 'LRT', NULL) shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) -contrast <- c("condition", "treatment", "control") -coef <- "condition_treatment_vs_control" +contrast <- c("group", "treatment", "control") +coef <- "group_treatment_vs_control" dds_list <- make_dds_list() lrt_design_data <- make_lrt_design_data() @@ -26,6 +53,26 @@ lrt_design_data <- make_lrt_design_data() #label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") ############## +# Each row in the ASCII table indicates which combination of test, type, coef, and contrast +# is tested by the respective indexed conditional statement in the following test_that code. + +#+---------+-------+-------+------+----------+-------+ +#| Results | Test | Type | Coef | Contrast | Check | +#+---------+-------+-------+------+----------+-------+ +#| 1 | LRT | NULL | - | - | E | +#| 2 | LRT | ashr | - | - | D | +#| 3 | LRT | apeglm| yes | - | D | +#| 3 | LRT | normal| yes | - | E | +#| 6 | Wald | NULL | - | yes | C | +#| 4 | Wald | ashr | - | yes | A | +#| 8 | Wald | apeglm| yes | - | B | +#| 4 | Wald | normal| - | yes | C | +#| 7 | NULL | NULL | - | yes | C | +#| 5 | NULL | ashr | - | yes | A | +#| 9 | NULL | apeglm| yes | - | B | +#| 5 | NULL | normal| - | yes | C | +#+---------+-------+-------+------+----------+-------+ + for (test in tests) { for (type in shrinkage_types) { if (test == 'Wald' || is.null(test)) { @@ -35,37 +82,77 @@ for (test in tests) { } label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") test_that(paste("make_results works correctly with", label), { + # 'Results' from the table above + # 1 if ((!is.null(test) && test == 'LRT') && is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, type=NULL) # No contrast when running test == 'LRT' - check_results(res, lrt_design_data, label, test=test, type=NULL) + res <- make_results(dds_name=dds_name, label=label, type=NULL) + # 2 } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { - res <- make_results(dds_name=dds_name, label=label, type=type) # No contrast when running test == 'LRT' - check_results(res, lrt_design_data, label, test=test, type=type) + res <- make_results(dds_name=dds_name, label=label, type=type) + # 3 } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { - # 'coef' is required for shrinkage type == 'apeglm' and 'apeglm' res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) - check_results(res, lrt_design_data, label, test=test, type=type) + # 4 } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { - res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) # Wald, ashr - check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=type) + res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) + # 5 } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=type) + # 6 } else if (!is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=test, type=NULL) + # 7 } else if (is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) - check_results(res, lrt_design_data, label, contrast=contrast, test=NULL, type=NULL) + # 8 } else if (!is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) - check_results(res, lrt_design_data, label, coef=coef, test=test, type=type) + # 9 } else if (is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) - check_results(res, lrt_design_data, label, coef=coef, test=NULL, type=type) } else { stop(paste(label, "was not tested")) } + + # Check make_results output for each possible combination of test and type + print(label) + expect_true(inherits(res$res, "DESeqResults")) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + lrt_mcols_description <- paste0(as.character(lrt_design_data$design)[1], " ", + as.character(lrt_design_data$design)[2], "' vs '", + as.character(lrt_design_data$reduced_design)[1], " ", + as.character(lrt_design_data$reduced_design)[2], "'") + # 'Check' from the table above + # A + if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'ashr')) { + expected_char <- paste(test %||% 'Wald', "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + # B + } else if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'apeglm')) { + coef <- str_split(coef, "_")[[1]] + expected_char <- paste(test %||% 'Wald', "test p-value:", coef[1], coef[2], coef[3], coef[4]) + expect_true(mcols(res$res)$description[4] == expected_char) + # C + } else if ((is.null(test) || test == 'Wald') && (is.null(type) || type == 'normal')) { + expected_char <- paste(test %||% 'Wald', "statistic:", contrast[1], contrast[2], "vs", contrast[3]) + expect_true(mcols(res$res)$description[4] == expected_char) + # D + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type != 'normal')) { + expected_char <- paste0(test, " p-value: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + # E + } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { + expected_char <- paste0(test, " statistic: '", lrt_mcols_description) + expect_true(mcols(res$res)$description[4] == expected_char) + } else { + stop(paste(label, 'was not checked')) + } + # Check for expected type stored in the result's metadata + if (!is.null(type)) { + expect_true(identical(metadata(res$res)$type, type)) + } else if (is.null(type)) { + expect_true(is.null(metadata(res$res)$type)) + } }) # test_that } # for type in shrinkage_types } # for test in tests @@ -76,18 +163,58 @@ test_that("make_results can handle dds object directly", { results <- make_results(dds_name=dds, label='Direct DDS', type='ashr', - contrast=c("condition", "treatment", "control")) + contrast=c("group", "treatment", "control")) # Check that the res element is a DESeqResults object expect_true(inherits(results$res, "DESeqResults")) }) # test_that -#test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { -# remove(dds_list) -# expect_error(make_results(dds_name='dds_wald', -# label='missing dds_list', -# type='ashr', -# contrast=c("condition", "treatment", "control")), -# "Can't find dds_list in global environment.") -# -#}) # test_that +test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL type when test == 'LRT'", { + expect_error(make_results(dds_name='dds_lrt', + label='Shrink lrt results', + type='ashr', + test == 'LRT'), + "You cannot pass a non-NULL type to make_results with test == 'LRT'. + For LRT, LFC values are set to 0 and should not be passed to lfcShrink. + Use type == NULL in make_results for LRT DDS objects.") +}) # test_that + +test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL type when test is missing", { + expect_error(make_results(dds_name='dds_lrt', + label='Shrink lrt results', + type='ashr'), + "You cannot pass a non-NULL type to make_results with an LRT dds object. + For LRT, LFC values are set to 0 and should not be passed to lfcShrink. + Use type == NULL in make_results for LRT DDS objects.") +}) # test_that + +test_that("make_results returns a DESeqResults object with all res$res$LFC == 0 when user + passes type == 'NULL' along with test == 'LRT'", { + res <- expect_silent(make_results(dds_name='dds_lrt', + label='Shrink lrt results', + type=NULL, + test='LRT')) + expect_true(inherits(res$res, "DESeqResults")) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect_true(all(res$res$log2FoldChange == 0)) +}) # test_that + +test_that("make_results returns a DESeqResults object with all res$res$LFC == 0 when user + passes type == 'NULL' along with missing test parameter'", { + res <- expect_silent(make_results(dds_name='dds_lrt', + label='Shrink lrt results', + type=NULL)) + expect_true(inherits(res$res, "DESeqResults")) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect_true(all(res$res$log2FoldChange == 0)) +}) # test_that + +remove(dds_list) +test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { + expect_error(make_results(dds_name='dds_wald', + label='missing dds_list', + type='ashr', + contrast=c("group", "treatment", "control")), + "Can't find dds_list in global environment.") + +}) # test_that diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index f7fbc017..7b02c80b 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -1,9 +1,10 @@ library(DESeq2) -devtools::load_all('../../../../lib/lcdbwf') -config <- lcdbwf:::load_config('config.yaml') library(testthat) +devtools::load_all('../../../../lib/lcdbwf') +config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') + # --------- Test strip_dotted_version_from_dds() ---------- # test_that("strip_dotted_version_from_dds works", { dds <- DESeq2::makeExampleDESeqDataSet() @@ -97,7 +98,7 @@ test_that("make_dds errors on missing test argument when reduced design is provi # --------------------------------------------------- # # -------------- collapseReplicates2 ---------------- # -test_that("collapseReplicates2 collapses the three control replicates and three treatment replicates to +test_that("collapseReplicates2 collapses the two control replicates and two treatment replicates to a single control row and a single treatment row in colData. Row names should still match column 1.", { # Setup a DESeqDataSet with replicates @@ -109,10 +110,10 @@ test_that("collapseReplicates2 collapses the three control replicates and three parallel=config$parallel$parallel) # Perform replicate collapsing - dds_collapsed <- collapseReplicates2(dds, dds$condition) + dds_collapsed <- collapseReplicates2(dds, dds$group) colData_collapsed <- as.data.frame(colData(dds_collapsed)) - expect_equal(length(as.character(colData_collapsed$condition[colData_collapsed$condition == 'control'])), 1) - expect_equal(length(as.character(colData_collapsed$condition[colData_collapsed$condition == 'treatment'])), 1) + expect_equal(length(as.character(colData_collapsed$group[colData_collapsed$group == 'control'])), 1) + expect_equal(length(as.character(colData_collapsed$group[colData_collapsed$group == 'treatment'])), 1) # Check if the first column of colData matches rownames expect_equal(rownames(colData_collapsed), colData_collapsed[,1]) }) # test_that diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index 45708137..048f2477 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -1,141 +1,14 @@ -# Function to make the featurecounts.txt file and write it to this directory -make_featurecounts_file <- function(fc_nrow = 3000, filename = "featurecounts.txt", seed = 1) { - if (file.exists('featurecounts.txt')) { return('featurecounts.txt exists, exiting the function') } - # Create the same random data for every test - set.seed(seed) - # First row contains info on featureCounts command call including parameters and input files - # First row of dataframe that will be exported as featurecounts.txt. Note that in featurecounts.txt - # files that are generated by featurecounts only have 1 line for the first row. - # in R, we need 12 columns in each row hence the 11 NAs added to the end of fc_row1 - fc_row1 <- c("# Program:featureCounts v2.0.3; Command:\"featureCounts\" \"-s2\" \"-p\" \"--countReadPairs\" \"-T\" \"16\" \"-a\" \"/data/NICHD-core0/references/mouse/gencode_m33/annotation/mouse_gencode_m33.gtf\" \"-o\" \"data/rnaseq_aggregation/featurecounts.txt\" \"data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam\" \"data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam\"", rep(NA, 11)) - - - # Create a data frame with example data - data <- data.frame( - Geneid = paste0("ENSMUSG", formatC(sample(10000:99999, fc_nrow, replace = FALSE), width = 10, flag = "0"), - ".", sample(1:20, fc_nrow, replace = TRUE)), # Append version IDs to test strip dotted - Chr = paste0("chr", sample(1:19, fc_nrow, replace = TRUE)), - Start = sample(30000000:99990000, fc_nrow), - End = sample(30001000:99999999, fc_nrow), - Strand = sample(c("+", "-"), fc_nrow, replace = TRUE), - Length = sample(500:15000, fc_nrow, replace = TRUE) - ) - - # Define sample paths based on lcfbwf featurecounts samplename format - sample_names <- c( - "data/rnaseq_samples/sample1/sample1.cutadapt.markdups.bam", - "data/rnaseq_samples/sample2/sample2.cutadapt.markdups.bam", - "data/rnaseq_samples/sample3/sample3.cutadapt.markdups.bam", - "data/rnaseq_samples/sample4/sample4.cutadapt.markdups.bam", - "data/rnaseq_samples/sample5/sample5.cutadapt.markdups.bam", - "data/rnaseq_samples/sample6/sample6.cutadapt.markdups.bam" - ) - # Simulate counts for each sample - counts <- matrix(sample(0:20000, fc_nrow, replace = TRUE), ncol = 6) - colnames(counts) <- sample_names - # Combine gene data and counts - feature_counts <- cbind(data, counts) - fc_row2 <- colnames(feature_counts) - colnames(feature_counts) <- NULL - feature_counts <- rbind(fc_row2, feature_counts) - # Add metadata to row1 saved in functions.R, colnames to row2 and data to the remaining `fc_nrow` rows - feature_counts <-rbind(fc_row1, feature_counts) - - # Write the data frame to a text file - write.table(feature_counts, file = filename, sep = "\t", quote = FALSE, row.names = FALSE) - return(paste("File saved as", filename)) -} # make_featureCounts_file - # Helper function to make minimal default design data. design_data is an argument and # object of type list that is passed to make_dds() make_design_data <- function() { - make_featurecounts_file() # featurecounts.txt will be written to this directory if it doesn't exist lst <- list( # Create the sample table - sampletable = data.frame( - samplename = c("sample1", "sample2", "sample3", "sample4", "sample5", "sample6"), - condition = c(rep("control", 3), rep("treatment", 3))), - design = ~ condition + sampletable = read.table("../../../../workflows/rnaseq/config/sampletable.tsv", + sep="\t", + header=TRUE), + design = ~ group ) # lst - lst$sampletable$condition <- as.factor(lst$sampletable$condition) + lst$sampletable$group <- as.factor(lst$sampletable$group) return(lst) } # make_default_wald_design_data -make_dds_list <- function() { - # Create design data and dds object for Wald test type - wald_design_data <- make_design_data() - make_featurecounts_file() # Write 'featurecounts.txt' if it does not exist - dds_wald <- make_dds(wald_design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - - lrt_design_data <- make_lrt_design_data() - dds_lrt <- make_dds(lrt_design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - - # Create dds_list - dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) - return(dds_list) -} # make_dds_list - -# Function to create design data for LRT test -make_lrt_design_data <- function() { - lrt_design_data <- make_design_data() - lrt_design_data$test <- 'LRT' - lrt_design_data$reduced_design <- ~1 - return(lrt_design_data) -} # make_lrt_design_data - -# Helper function to check the output of make_results -check_results <- function(res, lrt_design_data, label, contrast = NULL, coef = NULL, test = NULL, type = NULL) { - print(label) - # Check that results were returned by make_res - expect_true(!is.null(res)) - expect_true(identical(names(res), c('res', 'dds', 'label'))) - # Check that the res element returned by make_res is a DESeqResults object - expect_true(inherits(res$res, "DESeqResults")) - # Save a character representing the source of LRT pvalue for comparison with make_results output - lrt_mcols_description <- paste0(as.character(lrt_design_data$design)[1], " ", - as.character(lrt_design_data$design)[2], "' vs '", - as.character(lrt_design_data$reduced_design)[1], " ", - as.character(lrt_design_data$reduced_design)[2], "'") - - # Check the metadata in res for correct test and coef/contrast - # based on each combination of test, type and coef/contrast arguments - - if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'ashr')) { - # test == 'Wald' and type == 'ashr' -- OR -- test == NULL and type == 'ashr' - expected_char <- paste(test %||% 'Wald', "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'apeglm')) { - # test == 'Wald' and type == 'apeglm' -- OR -- test == NULL and type == 'apeglm' - coef <- str_split(coef, "_")[[1]] - expected_char <- paste(test %||% 'Wald', "test p-value:", coef[1], coef[2], coef[3], coef[4]) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if ((is.null(test) || test == 'Wald') && (is.null(type) || type == 'normal')) { - # test == 'Wald' and type == 'normal', -- OR -- test == 'Wald' and type == NULL - # test == NULL and type == 'normal' -- OR -- test == NULL and type == NULL - expected_char <- paste(test %||% 'Wald', "statistic:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type != 'normal')) { - # test == 'LRT' and type == 'ashr' -- OR -- test == 'LRT and type == 'apeglm' - expected_char <- paste0(test, " p-value: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) - } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { - # test == 'LRT and type == 'normal' -- OR -- test == 'LRT and type == NULL - expected_char <- paste0(test, " statistic: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) - } else { - stop(paste(label, 'was not checked')) - } - - # Check that the make_results defined metadata in the res object includes the correct shrinkage type - if (!is.null(type)) { - expect_true(identical(metadata(res$res)$type, type)) - } else if (is.null(type)) { - expect_true(is.null(metadata(res$res)$type)) - } -} # check_results diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index 1ab1bf65..9f007c52 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -7,7 +7,7 @@ library(BiocParallel) library(ggplot2) devtools::load_all('../../../../lib/lcdbwf') source('test-functions.R') -config <- lcdbwf:::load_config('config.yaml') +config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') text <- yaml::yaml.load_file('text.yaml') register(MulticoreParam(config$parallel$cores)) From fd9b6ec259c6418a9961e1f1036d4e65c756b513 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 15:04:18 -0400 Subject: [PATCH 64/93] Finished test-contrasts.R and contrasts.R --- lib/lcdbwf/R/contrasts.R | 80 +++---- lib/lcdbwf/R/results.R | 13 +- lib/lcdbwf/tests/testthat/config.yaml | 247 --------------------- lib/lcdbwf/tests/testthat/test-contrasts.R | 178 ++++++++++----- lib/lcdbwf/tests/testthat/test-results.R | 7 +- lib/lcdbwf/tests/testthat/text.yaml | 244 -------------------- 6 files changed, 162 insertions(+), 607 deletions(-) delete mode 100644 lib/lcdbwf/tests/testthat/config.yaml delete mode 100644 lib/lcdbwf/tests/testthat/text.yaml diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index fd0cabec..f0f6a9c5 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -157,7 +157,7 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ if (is.null(parallel)) parallel <- FALSE } - # Modify the args based on what we detected. Note that results() expects the + # Modify the args based on what we detected. dots[['parallel']] <- parallel # Note that results() expects the argument to be called 'object' rather than, @@ -168,94 +168,80 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Make sure the 'test' passed to make_results is the test detected in the dds object if ('test' %in% names(dots)) { if (dots$test == 'Wald' && (any(grepl('LRTStatistic', names(mcols(dds)))) || - any(grepl('LRTPvalue', names(mcols(dds)))))) { + any(grepl('LRTPvalue', names(mcols(dds)))))) { stop("The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") } else if (dots$test == 'LRT' && (any(grepl('WaldStatistic', names(mcols(dds)))) || - any(grepl('WaldPvalue', names(mcols(dds)))))) { + any(grepl('WaldPvalue', names(mcols(dds)))))) { stop("The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") } } - # Define 'test' and 'type' in dots for some checks. - # We can't simply reference test and type with dots$test and - # dots$type without requiring their presence because these parameters - # can both be missing and defaults will be used in their place, internally, leaving - # us unable to verify their compatibility. - - # Detect 'test' type when 'test' is missing from dots (make_results call) + # Detect 'test' type when the 'test' argument is missing from dots + test_detected <- FALSE if (!'test' %in% names(dots)) { - if (any(grepl('LRTStatistic', names(mcols(dds)))) || - any(grepl('LRTPvalue', names(mcols(dds))))) { - dots$test <- 'LRT' - test_detected <- TRUE - } else if (any(grepl('WaldStatistic', names(mcols(dds)))) || - any(grepl('WaldPvalue', names(mcols(dds))))) { - dots$test <- 'Wald' - test_detected <- TRUE + if (any(grepl('LRTStatistic', names(mcols(dds)))) || any(grepl('LRTPvalue', names(mcols(dds))))) { + dots$test <- 'LRT' + test_detected <- TRUE + } else if (any(grepl('WaldStatistic', names(mcols(dds)))) || any(grepl('WaldPvalue', names(mcols(dds))))) { + dots$test <- 'Wald' + test_detected <- TRUE } else { stop("test type was missing from make_results call and could not be detected from dds") } } - # In recent versions, lfcShrink 'type' should default to "apeglm". Here we - # are inspecting the function itself if the default ever changes. - if (!'type' %in% names(dots)) { dots$type <- eval(formals(DESeq2::lfcShrink)$type)[1] } + # Set the current default for 'type' from DESeq2 for lfcShrink if 'type' was not provided + if (!'type' %in% names(dots)) { + dots$type <- eval(formals(DESeq2::lfcShrink)$type)[1] + } # Call results() with the subset of dots that it accepts. results_dots <- lcdbwf:::match_from_dots(dots, results) res <- do.call("results", results_dots) - # When make_results is called with test set to 'LRT', we impute all rows in the log2FoldChange + # When make_results is called with 'test' set to 'LRT', + # or when make_results is called with 'test' missing but + # DDS object contains the LRT, we impute all rows in the log2FoldChange # column of the DESeqResults object to 0. LFC values only make sense to report for a single # comparison of two sample groups. This applies to the Wald test only. # LRT is instead performing a test of the removal of one or more factor(s) from the design formula. # DESeq2 reports log2FoldChange values for a single pair-wise comparison when test == 'LRT'. This # can be misleading and so this is our solution. + + # Adjust log2FoldChange for LRT test if (!is.null(dots$test) && dots$test == 'LRT') { res$log2FoldChange <- 0 } - # While lfcShrink doesn't accept NULL as a type, we're using it here as - # a mechanism to disable lfcShrink altogether. Also, it doesn't make - # sense to shrink LRT LFC values as those should remain 0 for - # the reason described above. - if (!is.null(dots$type) && !is.null(dots$test) && dots$test != 'LRT') { - # We're about to call lfcShrink, but it needs the res object...so inject the - # one we just made into dots. - dots[['res']] <- res + # Checks for LRT test and non-NULL type + if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && !test_detected) { + stop("You cannot pass a non-NULL or missing type to make_results with test == 'LRT'. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") + } else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && test_detected) { + stop("You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") + } - # lfcShrink also needs the dds object, so inject that too + # Call lfcShrink if applicable + if (!is.null(dots$type) && dots$test != 'LRT') { + dots[['res']] <- res dots[['dds']] <- dds lfcShrink_dots <- lcdbwf:::match_from_dots(dots, lfcShrink) res <- do.call("lfcShrink", lfcShrink_dots) - # Add the shrinkage type to the metadata of the results object - metadata(res)$type <- type - - } else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && !test_detected) { - stop("You cannot pass a non-NULL type to make_results with test == 'LRT'. - For LRT, LFC values are set to 0 and should not be passed to lfcShrink. - Use type == NULL in make_results for LRT DDS objects.") - } else if (!is.null(dots$type) && !is.null(dots$test) && dots$test == 'LRT' && test_detected) { - stop("You cannot pass a non-NULL type to make_results with an LRT dds object. - For LRT, LFC values are set to 0 and should not be passed to lfcShrink. - Use type == NULL in make_results for LRT DDS objects.") + metadata(res)$type <- dots$type } else { - # Be explicit, and ensure there's always a type attribute metadata(res)$type <- NULL } return( list( - res=res, - dds=dds_name, - label=label + res = res, + dds = dds_name, + label = label ) ) } - results_diagnostics <- function(res, dds, name, config, text){ lcdbwf:::mdcat('### Other diagnostics') print(knitr::kable(lcdbwf:::my_summary(res, dds, name))) diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index fa27a0a7..204f3440 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -47,8 +47,7 @@ build_results_tabs <- function(res_list, dds_list, config, text){ # If any contrasts contain LRT, print the source of LFC # and p values above MA & Volcano plots if (contains_LRT) { - mdcat(mcols(res_i)$description[9]) - mdcat(mcols(res_i)$description[7]) + mdcat("LRT log2FoldChange values have been set to 0") } print(lcdbwf:::plotMA_label( res_i, @@ -58,8 +57,7 @@ build_results_tabs <- function(res_list, dds_list, config, text){ lcdbwf:::mdcat('### Volcano plot') lcdbwf:::folded_markdown(text$results_plots$volcano, "Help") if (contains_LRT) { - mdcat(mcols(res_i)$description[9]) - mdcat(mcols(res_i)$description[7]) + mdcat("LRT log2FoldChange values have been set to 0") } print(lcdbwf:::plot_volcano_label( @@ -78,10 +76,9 @@ build_results_tabs <- function(res_list, dds_list, config, text){ } #' Check for LRT in a results object's metadata -#' @param res_i DESeq2 results object -#' @return Boolean TRUE if results object's pvalues were determined -#' via the likelihood-ratio test (LRT) and FALSE if the Wald test -#' was used. +#' @param res_i DESeqResults object +#' @return Boolean TRUE if results object's pvalues were provided by +#' nBinomLRT and FALSE if the Wald test was used. check_LRT <- function(res_i) { mcols_pval <- mcols(res_i)$description[9] mcols_pval <- grepl('LRT', mcols_pval) diff --git a/lib/lcdbwf/tests/testthat/config.yaml b/lib/lcdbwf/tests/testthat/config.yaml deleted file mode 100644 index 32b9999e..00000000 --- a/lib/lcdbwf/tests/testthat/config.yaml +++ /dev/null @@ -1,247 +0,0 @@ -# ============================================================================ -# RNA-seq configuration file -# ============================================================================ -# -# The rnsaeq.Rmd RMarkdown file reads in the values from this YAML file. -# -# This file is split into sections, indicated by the top-level keys. -# -# In rnaseq.Rmd, this config file is loaded early on, is not cached, and -# thereafter is available as the `config` object. This is a nested list data -# structure. For example, in rnaseq.Rmd the currently-configured sample table -# can be accessed by ``config$main$sampletable``. -# -# Note that RMarkdown chunks may depend on one or more of these sections. -# Changing a value in a section will therefore appropriately cause all chunks -# depending on that section to be re-run. - - -# MAIN ------------------------------------------------------------------------- -# This section configures global options -main: - - # Path to sample table. It can be modified in the colData chunk if needed. - sampletable: "../config/sampletable.tsv" - - - # If you are using Ensembl annotations with gene IDs like ENSG000001.1 (with - # the trailing dotted version number), then these genes will not be found in - # the OrgDb. Set this to TRUE to strip off the dotted version to fix this. - strip_dotted_version: TRUE - - # If you have technical replicates, you can automatically collapse them. - # Provide a column in colData here, and for each unique value in the provided - # column, the counts across samples sharing that value will be summed. - # Typically this would be set to the column representing biological - # replicate, assuming technical replicates share the same biological - # replicate ID. - collapse_by: NULL - - # By default DESeq2 assumes alpha of 0.1 when filtering low-count genes. This - # is also used to select significant genes in the various helper functions. - alpha: 0.1 - - # If you plan on creating dds objects with featureCounts that will be - # compared with Salmon or Kallisto, there may be different numbers of genes - # in the respective dds objects. This can happen if the GTF used by - # featureCounts and the FASTA used by Salmon or Kallisto do not exactly - # match. By default, this mismatch will stop with an error. If instead you - # want to use only the intersection of genes found in all contrasts, set this - # to TRUE. - force_intersect: FALSE - - # AnnotationHub uses a safe permissions approach, setting the AnnotationHub - # lock file to be only visible by the creating user and the cache database to - # be read-only for the group. If this setting is TRUE, then the permissions - # will be set on BiocFileCache.sqlite and BiocFileCache.sqlite.LOCK to be - # read/write for both user and group. - group_permissions: TRUE - - -# PLOTTING --------------------------------------------------------------------- -# This section configures plotting options for PCA, clustered heatmap, and -# functional enrichment plots. -plotting: - - # For each item in this list, you will get a column of colors along the right - # side in the clustered heatmap and a PCA plot tab. - covariates_for_plots: - - Age - - Genotype - - Exercise - - Group - - Mass - - Litter - - AgeDays - - Genotype_Exercise - - Age_Genotype - - Exercise_Genotype - - # Arguments for dotplot_args, emapplot_args, and cnetplot_args are passed to - # the respective clusterProfiler plotting functions dotplot, emapplot, and - # cnetplot. Any arguments accepted by those functions can be added here. - dotplot_args: - showCategory: 20 - color: p.adjust - size: NULL - split: NULL - font.size: 12 - x: GeneRatio - - emapplot_args: - showCategory: 35 - color: p.adjust - cex_label_category: .7 - - cnetplot_args: - showCategory: 5 - foldChange: NULL - colorEdge: FALSE - circular: FALSE - node_label: all - - # If diagnostics_results_names is empty or missing, and - # toggle$results_diagnostics is TRUE (see the "toggle" section), then - # diagnostics will be run for all results objects. This can be time consuming - # if you have many contrasts. - # - # If toggle$results_diagnostics is FALSE, no diagnostics will be run for - # results and the diagnostics_results_names entry is ignored. - # - # Otherwise if toggle$results_diagnostics is TRUE and this list contains - # items, then only run diagnostics for the following names in res_list. This - # can be useful in cases where you ran many contrasts but only want to run - # diagnostics on a subset of them. - diagnostics_results_names: [] - - -# PARALLEL --------------------------------------------------------------------- -# Configure parallelization here. -parallel: - - parallel: TRUE - - # If "cores" is an integer, use that many cores. - # - # If "cores" is a string, assume it's the name of an environment variable - # that stores the number of cores (e.g., "SLURM_CPUS_PER_TASK"). If that env - # var is missing or empty then default to 1 core. - # - # NOTE: on some clusters, you may want to also set the environment variable - # `OMP_NUM_THREADS=1` to prevent some R parallelization code from trying to - # grab all cores on the node, even though you were only allocated a subset by - # the batch scheduler. - cores: "SLURM_CPUS_PER_TASK" - -# TOGGLE ----------------------------------------------------------------------- -# This section provides an easy way to turn on or off entire parts of the -# analysis. -toggle: - - # Import Salmon results instead of featureCounts? This is global; if you want - # to compare featureCounts and Salmon then leave this to FALSE and use the - # `salmon=TRUE` argument to lcdbwf::make_dds. - salmon: FALSE - - # Import Kallisto results instead of featureCounts? See similar notes above - # for Salmon. - kallisto: FALSE - - # Create diagnostic plots for all dds objects? - dds_diagnostics: TRUE - - # Create diagnostic plots for results objects? If TRUE, will check the - # config$plotting$diagnostics_for_results list. - results_diagnostics: TRUE - -# ANNOTATION ------------------------------------------------------------------- -# Configuration specific to annotations and databases -annotation: - - # Specify the genus and species. This will be used to search AnnotationHub. - genus_species: "Mus musculus" - - # You can optionally manually search AnnotationHub to find a different - # annotation keys for OrgDb and/or TxDb and use them here to override the - # default latest versions. - orgdb_key_override: NULL - txdb_key_override: NULL - - # The OrgDb will be downloaded here and subsequently read directly from this - # directory - hub_cache: "../../../include/AnnotationHubCache" - localHub: FALSE - force: FALSE - - # What is the format of the gene IDs in rownames(dds)? - keytype: 'ENSEMBL' - - # What is the format of the gene IDs in msigdbr? - msigdbr_keytype: "ensembl_gene" - - # For each column here, the OrgDb will be queried using the gene IDs in the - # rownames of the results object (type is specified by "keytype" above) and - # the resulting values will be attached to the results object. - # - # Use columns(orgdb) to figure out what the options are for your particular - # OrgDb. - orgdb_columns: - - SYMBOL - - ALIAS - - UNIPROT - - ENTREZID - - # For each column listed here, if a value is NA when searching the - # AnnotationHub (as described above) then it will be filled in with the - # corresponding value from rownames(results_object). Useful for when you want - # use symbol where possible but fill in with the Ensembl ID when symbol is - # not available. - fill: - - SYMBOL - - # Which column to use when labeling plots? E.g., MA and volcano plots. Must - # be one of "orgdb_columns". - label_column: SYMBOL - - -# FUNCTIONAL ENRICHMENT -------------------------------------------------------- -# Configuration specific to functional enrichment as performed in -# functional_enrichment.Rmd -functional_enrichment: - - ontologies: - # Standard Gene Ontology - "BP": "GO Biological Process" - "CC": "GO Cellular Component" - "MF": "GO Molecular Function" - - # KEGG pathway analysis - # "KEGG": "KEGG Pathways" - - ## MSigDB (compiled for human, but orthologs are available which may be of - ## questionable utility for other organisms). To see what's available, use - ## lcdbwf::available_msigdb_keys() and cross-reference with - ## https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp - #"C2_CGP": "MSigDB chemical and genetic perturbations" - #"C2_CP": "MSigDb Canonical pathways" - #"C5_GO:BP": "MSigDB's version of BP" - #"C5_GO:CC": "MSigDB's version of CC" - #"C5_GO:MF": "MSigDB's version of MF" - #"C8": "MSigDB cell-type-specific gene sets" - #"H": "MSigDB hallmark gene sets" - - # Type can be "OR" for overrepresentation analysis, or "GSEA" for gene set - # enrichment analysis. - kind: - - "OR" - - # Options here are "up", "down" or "changed". - directions: - - "up" - - "down" - - "changed" - - # Any GO terms above these cutoffs will not be reported. Set to 1 if you're - # fine getting plots even with no statistically significant terms. - pvalueCutoff: 1 - qvalueCutoff: 1 diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 55829b06..bf9a7107 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -60,17 +60,17 @@ lrt_design_data <- make_lrt_design_data() #| Results | Test | Type | Coef | Contrast | Check | #+---------+-------+-------+------+----------+-------+ #| 1 | LRT | NULL | - | - | E | -#| 2 | LRT | ashr | - | - | D | -#| 3 | LRT | apeglm| yes | - | D | -#| 3 | LRT | normal| yes | - | E | -#| 6 | Wald | NULL | - | yes | C | -#| 4 | Wald | ashr | - | yes | A | -#| 8 | Wald | apeglm| yes | - | B | -#| 4 | Wald | normal| - | yes | C | -#| 7 | NULL | NULL | - | yes | C | -#| 5 | NULL | ashr | - | yes | A | -#| 9 | NULL | apeglm| yes | - | B | -#| 5 | NULL | normal| - | yes | C | +#| 2 | LRT | ashr | - | - | F | +#| 2 | LRT | apeglm| - | - | F | +#| 2 | LRT | normal| - | - | F | +#| 5 | Wald | NULL | - | yes | C | +#| 3 | Wald | ashr | - | yes | A | +#| 7 | Wald | apeglm| yes | - | B | +#| 3 | Wald | normal| - | yes | C | +#| 6 | NULL | NULL | - | yes | C | +#| 4 | NULL | ashr | - | yes | A | +#| 8 | NULL | apeglm| yes | - | B | +#| 4 | NULL | normal| - | yes | C | #+---------+-------+-------+------+----------+-------+ for (test in tests) { @@ -81,33 +81,35 @@ for (test in tests) { dds_name <- 'dds_lrt' } label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") - test_that(paste("make_results works correctly with", label), { + test_that(paste("make_results works correctly with type =", type, "and", test, "being detected automatically + from DDS"), { # 'Results' from the table above # 1 if ((!is.null(test) && test == 'LRT') && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL) # 2 - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && !type %in% c('apeglm','normal'))) { - res <- make_results(dds_name=dds_name, label=label, type=type) + } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('ashr', 'apeglm', 'normal'))) { + # 'Check' from the table above + # F + expect_error(make_results(dds_name=dds_name, label=label, type=type), + "You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") + return() # 3 - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('apeglm','normal'))) { - res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) - # 4 } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) - # 5 + # 4 } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) - # 6 + # 5 } else if (!is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) - # 7 + # 6 } else if (is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) - # 8 + # 7 } else if (!is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) - # 9 + # 8 } else if (is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) } else { @@ -115,7 +117,6 @@ for (test in tests) { } # Check make_results output for each possible combination of test and type - print(label) expect_true(inherits(res$res, "DESeqResults")) expect_true(identical(names(res), c('res', 'dds', 'label'))) lrt_mcols_description <- paste0(as.character(lrt_design_data$design)[1], " ", @@ -142,6 +143,7 @@ for (test in tests) { expect_true(mcols(res$res)$description[4] == expected_char) # E } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { + expect_true(all(res$res$log2FoldChange == 0)) expected_char <- paste0(test, " statistic: '", lrt_mcols_description) expect_true(mcols(res$res)$description[4] == expected_char) } else { @@ -157,64 +159,120 @@ for (test in tests) { } # for type in shrinkage_types } # for test in tests + +# -------------------------- direct dds -------------------------- # test_that("make_results can handle dds object directly", { dds <- dds_list[['dds_wald']] # Directly pass the dds object - results <- make_results(dds_name=dds, - label='Direct DDS', - type='ashr', - contrast=c("group", "treatment", "control")) - - # Check that the res element is a DESeqResults object - expect_true(inherits(results$res, "DESeqResults")) + res <- make_results(dds_name=dds, + label='Direct DDS', + type='ashr', + contrast=contrast) + expect_true(inherits(res$res, "DESeqResults")) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) }) # test_that +# ---------------------------------------------------------------- # -test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL type when test == 'LRT'", { - expect_error(make_results(dds_name='dds_lrt', - label='Shrink lrt results', - type='ashr', - test == 'LRT'), - "You cannot pass a non-NULL type to make_results with test == 'LRT'. - For LRT, LFC values are set to 0 and should not be passed to lfcShrink. - Use type == NULL in make_results for LRT DDS objects.") -}) # test_that -test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL type when test is missing", { +# ---- make_results with dds_lrt but with wald test specified ---- # +# Similar structure to the ASCII table depicted tests from above +# but with the 'test' argument included in make_results +# and test == 'LRT' +test_that("make_results errors when user passes mismatched test == 'Wald' with LRT DDS", { expect_error(make_results(dds_name='dds_lrt', label='Shrink lrt results', - type='ashr'), - "You cannot pass a non-NULL type to make_results with an LRT dds object. - For LRT, LFC values are set to 0 and should not be passed to lfcShrink. - Use type == NULL in make_results for LRT DDS objects.") + type=NULL, + test='Wald'), + "The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") }) # test_that +# ---------------------------------------------------------------- # + -test_that("make_results returns a DESeqResults object with all res$res$LFC == 0 when user - passes type == 'NULL' along with test == 'LRT'", { - res <- expect_silent(make_results(dds_name='dds_lrt', +# ---- make_results with dds_wald but with LRT test specified ---- # +# Similar structure to the ASCII table depicted tests from above +# but with the 'test' argument included in make_results +# and test == 'LRT' +test_that("make_results errors when user passes mismatched test == 'LRT' with Wald DDS", { + expect_error(make_results(dds_name='dds_wald', label='Shrink lrt results', type=NULL, - test='LRT')) - expect_true(inherits(res$res, "DESeqResults")) - expect_true(identical(names(res), c('res', 'dds', 'label'))) - expect_true(all(res$res$log2FoldChange == 0)) + test='LRT'), + "The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") }) # test_that +# ---------------------------------------------------------------- # -test_that("make_results returns a DESeqResults object with all res$res$LFC == 0 when user - passes type == 'NULL' along with missing test parameter'", { - res <- expect_silent(make_results(dds_name='dds_lrt', - label='Shrink lrt results', - type=NULL)) + +# ----- Attempt to shrink LRT results but with test included ----- # +# Similar structure to the ASCII table depicted tests from above +# but with the 'test' argument included in make_results +# and test == 'LRT' +for (type in c('ashr', 'apeglm', 'normal')) { + test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL or missing type when test == 'LRT'", { + expect_error(make_results(dds_name='dds_lrt', + label='Shrink lrt results', + type=type, + test='LRT'), + "You cannot pass a non-NULL or missing type to make_results with test == 'LRT'. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") + }) # test_that +} # for type +# ---------------------------------------------------------------- # + + +# -------------- missing both test and type with LRT ------------- # +# Similar structure to the ASCII table depicted tests from above +# with the 'test' argument also missing in make_results +# and test also missing. When test is missing, test should be detected from dds_lrt as 'LRT'. +# With type also missing, type should be set as the current default: 'ashr' as of 5-30-2024. +# This combination of test and type is incompatible and so the following error message should +# be returned. +test_that("make_results errors when user attempts to run lfcShrink by defining a missing type when test == 'LRT'", { + expect_error(make_results(dds_name='dds_lrt', + label='missing test and type of LRT DDS'), + "You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") +}) # test_that +# ---------------------------------------------------------------- # + + +# ------------- test included for all types dds Wald ------------- # +# Similar structure to the ASCII table depicted tests from above +# but with the 'test' argument included in make_results +# and test == 'Wald' +for (type in c('ashr', 'apeglm', 'normal')) { + test_that("make_results returns a DESeqResults object with !all res$res$LFC == 0 when user passes a defined type along with test == 'Wald'", { + if (type != 'apeglm') { + res <- make_results(dds_name='dds_wald', + label='Shrink Wald results', + contrast=contrast, + type=type, + test='Wald') expect_true(inherits(res$res, "DESeqResults")) expect_true(identical(names(res), c('res', 'dds', 'label'))) - expect_true(all(res$res$log2FoldChange == 0)) -}) # test_that + expect_true(!all(res$res$log2FoldChange == 0)) + expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + } else if (type == 'apeglm') { + res <- make_results(dds_name='dds_wald', + label='Shrink Wald results', + coef=coef, + type=type, + test='Wald') + expect_true(inherits(res$res, "DESeqResults")) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect_true(!all(res$res$log2FoldChange == 0)) + expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + } # if type + }) # test_that +} # for type +# ---------------------------------------------------------------- # + +# ---------------------- missing dds_list ------------------------ # remove(dds_list) test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { expect_error(make_results(dds_name='dds_wald', label='missing dds_list', type='ashr', - contrast=c("group", "treatment", "control")), + contrast=contrast), "Can't find dds_list in global environment.") - }) # test_that +# ---------------------------------------------------------------- # diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index 9f007c52..de177b92 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -5,10 +5,13 @@ library(rlang) library(stringr) library(BiocParallel) library(ggplot2) +library(AnnotationHub) +library(dplyr) + devtools::load_all('../../../../lib/lcdbwf') source('test-functions.R') config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') -text <- yaml::yaml.load_file('text.yaml') +text <- yaml::yaml.load_file('../../../../workflows/rnaseq/downstream/text.yaml') register(MulticoreParam(config$parallel$cores)) # Mock function to capture mdcat output @@ -43,6 +46,7 @@ res <- dds_and_res$res$res dds_list <- list(dds1=dds) res_list <- list(res1=list(res=res, dds='dds1', label='Defaults')) +res_list <- lcdbwf:::attach_extra(res_list, config) test_that("build_results_tabs works with default config", { expect_silent(build_results_tabs(res_list, dds_list, config, text)) @@ -55,6 +59,7 @@ res <- dds_and_res$res$res dds_list <- list(dds1=dds) res_list <- list(res1=list(res=res, dds='dds1', label='LRT')) +res_list <- lcdbwf:::attach_extra(res_list, config) # Test build_results_tabs function test_that("build_results_tabs works with LRT config", { diff --git a/lib/lcdbwf/tests/testthat/text.yaml b/lib/lcdbwf/tests/testthat/text.yaml deleted file mode 100644 index 348a0325..00000000 --- a/lib/lcdbwf/tests/testthat/text.yaml +++ /dev/null @@ -1,244 +0,0 @@ -# Text to be included into RMarkdown files. -# -# This keeps the RMarkdown uncluttered and avoids cumbersome lcdbwf::mdcat() -# calls within for-loops, while still retaining the ability to easily edit. -# -# When adding new entries, note that the "|" is important -- see -# https://yaml-multiline.info/ for more details. - - -# SAMPLE SIMILARITY AND QC -qc: - clustered_heatmap: | - The following heatmap shows a hierarchical clustering of pairwise distances - between samples. Darker blue means less distant (i.e. more similar). In general - we expect to see replicates clustering together and separation of treatments. - - pca: | - Another way of looking at sample clustering is principal components - analysis (PCA). The x- and y-axes do not have units, rather, they represent - the dimensions along which the samples vary the most. The amount of - variance explained by each principal component is indicated in the axes - label. - -# SIZE FACTORS ----------------------------------------------------------------- -sizefactors: | - ## Size factors {.tabset} - - Ideally, all libraries were sequenced to identical depth, in which case all - size factors would be 1.0. In practice, this is almost never the case due to - the difficulties of accurately measuring low concentrations of cDNA. DESeq2 uses size - factor estimates to normalized for sequencing depth across - libraries. If some libraries are much higher or lower than - 1 then those libraries had dramatically different coverage and we should be - careful about interpreting results. - - Simply taking the total number of reads has been shown to be too sensitive to - a small number of highly-expressed genes. DESeq2's size factors are - calculated according to the median-ratio method (equation 5 of [Anders - & Huber - 2010](http://dx.doi.org/10.1186/gb-2010-11-10-r106)). - - These diagnostic plots show the size factors (as a ranked bar plot) and the - relationship between the size factors and the total read count (as - a scatterplot). Samples whose total read count differs from size factor may - indicate that the sample has a small number of highly expressed genes. - -# DDS DIAGNOSTICS -------------------------------------------------------------- -dds_diagnostics: - dispersion: | - See the [dispersion - estimates](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#dispersion-plot-and-fitting-alternatives) - section of the DESeq2 vignette for details. - - colData: | - This table lists the samples included in the dds object. - - outliers: | - Are there any samples with many outliers? See the [approach to count - outliers](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#approach-to-count-outliers) - section of the DESeq2 vignette for details. - - sparsity: | - This plot can be used to see if the majority of genes with high counts - are coming from individual (or a small number of) samples. - - design_matrix: | - This table shows the design matrix of the dds object. - - -# RESULTS TABLE --------------------------------------------------------------- -# Help for the columns in the results summary table. -results_table: | - - The **row names** of the table are the long-format human-readable labels of - each contrast - - **name** column is the short name used to generate filenames. Upset plots, - output TSVs and worksheets in the Excel spreadsheet will be labeled with - this short name. - - **up** and **down** are the numbers of up- and downregulated genes respectively. - - **nonzero.vs.total:** the number of genes with nonzero read counts - and the total number of annotated genes. - - **alpha:** genes with an adjusted pvalue lower than this are considered - significantly changed. - - **lfcThreshold:** by default, the null hypothesis is that the log2 fold - change of genes is not different from zero. In some circumstances, it is - useful to use a different threshold, which will be reported here. - - **outliers:** Cook's distance is used as a measure of how much a single - sample is influencing the fitted coefficients for a gene. If that value - is too high, the gene is marked as an outlier and the pvalue and adjusted - pvalue will be set to NA. If there are many (hundreds to thousands) of - outliers, this is an indication that a sample may be problematic. In this - case, the dds diagnostics plots may help identify the culprit. - - **low.counts:** How many genes were not even tested for differential - expression because they had too low counts. - - **dds:** The name of the DESeqDataSet object used for the contrast (see - "other diagnostics" section above for details). - - **design:** the design, using R syntax. Originally described by - [Wilkinson - 1973](https://www.jstor.org/stable/2346786?seq=2#metadata_info_tab_contents), - but see the [section in R for Data - Science](https://r4ds.had.co.nz/model-basics.html#formulas-and-model-families) - as well. - - **test:** The contrast performed using the design. Words in this column - indicate the contrast was specified to DESeq2 using a 3-item character - vector. Numbers (like `0,+1`) indicate a numeric contrast. For more details - on the design, check the design matrix tab in the diagnostics section for - the corresponding dds object matching the name in the *dds* column. - - -# RESULTS PLOTS ---------------------------------------------------------------- -# Help for the tabbed results plots -results_plots: - ma: | - An **M-A plot** gives a good overview of the comparison. There is one dot per gene. - It shows three dimensions: the normalized counts (baseMean, on a log10 scale, - x-axis), the effect size (log2FoldChange, y-axis) and whether or not a gene was - signficant (color). While it is tempting to interpret baseMean as - "expression level", this would not be correct because the values are only - normalized to library size, not transcript length. We can say that a gene - with higher baseMean than another gene has more observed reads, but we - cannot say whether that is because it has a longer transcript or because - there are more transcripts. - - volcano: | - A **volcano plot** shows fewer dimensions than an M-A plot (it does not show - normalized counts) but it can be useful for looking at the relationship of - adjusted p-value and log2FoldChange. - - pval_hist: | - The **raw p-value distribution** helps us assess if the data met the - assumptions of DESeq2. This [Variance Explained blog - post](http://varianceexplained.org/statistics/interpreting-pvalue-histogram/) - has a good explanation of intepretation. Additionally, we color the p-values - for genes that have been removed from independent filtering due to having too - low counts. - - See the DESeq2 vignette on [independent filtering and multiple - testing](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#independent-filtering-and-multiple-testing) - for more details. - - -# RESULTS DIAGNOSTICS ---------------------------------------------------------- -# Individual help for the diagnostic plots for results objects -results_diagnostics: - filter_ma: | - This MA plot colors genes by whether or not they were filtered out in the - independent filtering step. - - outlier_ma: | - This MA plots colors genes by whether or not they were considered an outlier. - - lfcse_basemean: | - This plots the standard error of the log2FoldChange (lfcSE) vs baseMean. In - general, as baseMean increases the lfcSE decreases. - - lfcse_lfc: | - Standard error of the log2FoldChange vs the log2FoldChange itself. - - -# UPSET PLOTS ------------------------------------------------------------------ -upset_plots: | - # UpSet plots {.tabset} - - Here we gather together all the interesting gene sets into an [UpSet - plot](http://caleydo.org/tools/upset/). These plots show the combinatorial - overlaps of genes found to be up or down across the different contrasts - performed. It's much like a Venn diagram, but easier to interpret and can scale - to many comparisons. - - The plot shows a summary of which genes were found in common across - contrasts. If you want to know the details of *which* genes were found in - common, a TSV file is linked under each plot. This file has rows for each - gene and columns for each contrast. A `1` indicates that gene was found to be - up/down/changed in that contrast. You can sort this TSV to identify the genes - of interest. For example, sorting all columns in descending order will cause - genes shared in all contrasts (a `1` in each column) to come to the top. - - **Interpretation notes:** A gene can only be found in one column in an UpSet - plot. So if you want to confirm that the number of genes for a contrast - matches the results tables and MA plots, sum all the bars for which there is - a dot in that contrast's row. - -results_files: - - The best way to use these is to rank by the log2FoldChange column, and then - consider genes whos padj value falls below 0.1. - - **A note on NA values:** DESeq2 uses NA to encode extra information about - a gene, depending on which column the NA is found in. - - - If log2FoldChange, pvalue, and padj all NA it means that the baseMean was - 0. That is, no samples in this contrast had any reads. - - - If only pvalue and padj are NA it means that the gene was detected as - a count outlier. - - - If only padj is NA, it means the gene had too low counts and was filtered - out from multiple testing because it had no chance of being significant, - thereby reducing the harshness of the rest of the multiple testing - adjustments. - - The output files have the following columns. - - - - **gene** is typically the most stable accession. For most organisms, this - is Ensembl. Human-readable names can be inconsistent (or missing in many - cases), so these IDs provide the most robust way of ensuring each gene has - a unique ID. - - **SYMBOL, ALIAS, UNIPROT, ENTREZID**, and possibly others, are columns with - additional gene identifiers. Typically SYMBOL and ALIAS will be the most - human-readable. - - **baseMean** is the average number of normalized reads across all samples. - It is used for estimating dispersion and gives an indication of how much - evidence there is for a gene. It should not be interpreted as expression - level, since these values are not normalized by transcript length (which - would introduce various biases). - - **log2FoldChange** is the effect size of differential expression. DESeq2 - first calculates the simple log2 fold change using the mean of normalized - counts for the condtions being compared. However, both 1/5 and 1000/5000 - give a fold change of 5x, but the latter has much more information and we - would want to put more stake in that. So DESeq2 uses log2 fold change - shrinkage methods to down-weight 1/5 to something smaller than 5x so that - we don't design follow-up experiments on results with low information. For - more details, see the papers on - [apeglm](https://doi.org/10.1093/bioinformatics/bty895), - [ashr](https://doi.org/10.1093/biostatistics/kxw041), and - [normal](https://doi.org/10.1186/s13059-014-0550-8). **This is designed to - be the most important column to sort on.**. - - **lfcSE** is the standard error of the log2FoldChange estimate. In general, - the log2 fold change for genes with high lfcSE will be shrunken more. This - column is sometimes useful for diagnostics but not routinely used. - - **stat** is the test statistic used for generating the p-value. It can - sometimes be useful for diagnostics but is not routinely used. - - **pvalue** is the *raw pvalue*. **Do not interpret these values** because - they are not adjusted for multiple comparisons. This column is included - because sometimes it has an NA, which can be informative (see above notes - on NA values). - - **padj** is the pvalue, adjusted for multiple tests. This is required to - weed out bad data that looks good. See this [StatQuest - video](https://www.youtube.com/watch?v=K8LQSvtjcEo) for an explanation. - **Use this column** to assess statistical significance. It is equivalent to - the false discovery rate. Unless otherwise noted, use the default of 0.1 as - the threshold. This means that out of all the genes that have padj values - less than 0.1, we may be wrong on 10% of those. In other words, 10% of the - significant genes aren't actually significant. From c860bc554a65c9ed3a4eb8a1591d32d0293eadb3 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 16:30:59 -0400 Subject: [PATCH 65/93] finished test-results.R minor changes elsewhere. build_results_tabs now returns a list of its previously only printed plots so they can be tested --- lib/lcdbwf/R/contrasts.R | 20 ++-- lib/lcdbwf/R/results.R | 26 +++-- lib/lcdbwf/tests/testthat/test-contrasts.R | 38 ++----- lib/lcdbwf/tests/testthat/test-functions.R | 30 ++++- lib/lcdbwf/tests/testthat/test-results.R | 121 +++++++++++++-------- 5 files changed, 140 insertions(+), 95 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index f0f6a9c5..8d5bf082 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -248,26 +248,30 @@ results_diagnostics <- function(res, dds, name, config, text){ lcdbwf:::folded_markdown(text$results_diagnostics$filter_ma, "Help") filterThreshold <- metadata(res)$filterThreshold - p <- ggplot(res %>% as.data.frame() %>% mutate(filtered=res$baseMean < filterThreshold)) + + p1 <- ggplot(res %>% as.data.frame() %>% mutate(filtered=res$baseMean < filterThreshold)) + aes(x=log10(baseMean), y=log2FoldChange, color=filtered) + geom_point() - print(p) + print(p1) lcdbwf:::folded_markdown(text$results_diagnostics$outlier_ma, "Help") - p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + + p2 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + aes(x=log10(baseMean), y=log2FoldChange, color=outlier) + geom_point() - print(p) + print(p2) lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_basemean, "Help") - p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + + p3 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + aes(x=log10(baseMean), y=lfcSE, color=outlier) + geom_point() - print(p) + print(p3) lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_lfc, "Help") - p <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + + p4 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + aes(x=log2FoldChange, y=lfcSE, color=outlier) + geom_point() - print(p) + print(p4) + + # Save plots to a list and return for testing + plots <- list(p1=p1, p2=p2, p3=p3, p4=p4) + return(plots) } diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index 204f3440..346695d8 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -29,12 +29,16 @@ build_results_tabs <- function(res_list, dds_list, config, text){ } } + # Make a list to store the plot objects for testing + plots <- list() + for (name in names(res_list)){ dds_i <- dds_list[[res_list[[name]][['dds']] ]] res_i <- res_list[[name]][['res']] label <- res_list[[name]][['label']] - # Do any contrasts contain LRT? + # Does this contrast contain LRT? contains_LRT <- check_LRT(res_i) + plots[[name]] <- list() genes_to_label <- lcdbwf:::genes_to_label(res_i, n=5, config) lcdbwf:::mdcat('## ', label, ' {.tabset}') @@ -49,10 +53,12 @@ build_results_tabs <- function(res_list, dds_list, config, text){ if (contains_LRT) { mdcat("LRT log2FoldChange values have been set to 0") } - print(lcdbwf:::plotMA_label( + p <- lcdbwf:::plotMA_label( res_i, genes_to_label=genes_to_label, - label_column=config$annotation$label_column)) + label_column=config$annotation$label_column) + print(p) + plots[[name]]$ma_plot <- p lcdbwf:::mdcat('### Volcano plot') lcdbwf:::folded_markdown(text$results_plots$volcano, "Help") @@ -60,19 +66,25 @@ build_results_tabs <- function(res_list, dds_list, config, text){ mdcat("LRT log2FoldChange values have been set to 0") } - print(lcdbwf:::plot_volcano_label( + p <- lcdbwf:::plot_volcano_label( res_i, genes_to_label=genes_to_label, - label_column=config$annotation$label_column)) + label_column=config$annotation$label_column) + print(p) + plots[[name]]$volcano_plot <- p lcdbwf:::mdcat('### P-value distribution') lcdbwf:::folded_markdown(text$results_plots$pval_hist, "Help") - print(lcdbwf:::pval_hist(res_i)) + p <- lcdbwf:::pval_hist(res_i) + print(p) + plots[[name]]$pval_hist_plot <- p if (config$toggle$results_diagnostics){ - lcdbwf:::results_diagnostics(res=res_i, dds=res_list[[name]]$dds, name=name, config=config, text=text) + p <- lcdbwf:::results_diagnostics(res=res_i, dds=res_list[[name]]$dds, name=name, config=config, text=text) + plots[[name]]$diag_plots_list <- p } } + return(plots) } #' Check for LRT in a results object's metadata diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index bf9a7107..b262150d 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -8,33 +8,9 @@ config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.y source('test-functions.R') register(MulticoreParam(config$parallel$cores)) -# Function to create design data for LRT test -make_lrt_design_data <- function() { - lrt_design_data <- make_design_data() - lrt_design_data$test <- 'LRT' - lrt_design_data$reduced_design <- ~1 - return(lrt_design_data) -} # make_lrt_design_data - -make_dds_list <- function() { - # Create design data and dds object for Wald test type - wald_design_data <- make_design_data() - dds_wald <- make_dds(wald_design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - - lrt_design_data <- make_lrt_design_data() - dds_lrt <- make_dds(lrt_design_data, - config=config, - featureCounts='featurecounts.txt', - parallel=config$parallel$parallel) - - # Create dds_list - dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) - return(dds_list) -} # make_dds_list - +is_deseq_res <- function(x) { + inherits(x, "DESeqResults") +} # Test all combinations of test and type # NULL shrinkage type skips lfcShrink # NULL test type runs Wald (default test) @@ -117,7 +93,7 @@ for (test in tests) { } # Check make_results output for each possible combination of test and type - expect_true(inherits(res$res, "DESeqResults")) + expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) lrt_mcols_description <- paste0(as.character(lrt_design_data$design)[1], " ", as.character(lrt_design_data$design)[2], "' vs '", @@ -168,7 +144,7 @@ test_that("make_results can handle dds object directly", { label='Direct DDS', type='ashr', contrast=contrast) - expect_true(inherits(res$res, "DESeqResults")) + expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) }) # test_that @@ -246,7 +222,7 @@ for (type in c('ashr', 'apeglm', 'normal')) { contrast=contrast, type=type, test='Wald') - expect_true(inherits(res$res, "DESeqResults")) + expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(!all(res$res$log2FoldChange == 0)) expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) @@ -256,7 +232,7 @@ for (type in c('ashr', 'apeglm', 'normal')) { coef=coef, type=type, test='Wald') - expect_true(inherits(res$res, "DESeqResults")) + expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(!all(res$res$log2FoldChange == 0)) expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index 048f2477..5028c960 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -1,5 +1,4 @@ -# Helper function to make minimal default design data. design_data is an argument and -# object of type list that is passed to make_dds() +# Helper function to make minimal default design data make_design_data <- function() { lst <- list( # Create the sample table @@ -12,3 +11,30 @@ make_design_data <- function() { return(lst) } # make_default_wald_design_data +# Function to create design data for LRT test +make_lrt_design_data <- function() { + lrt_design_data <- make_design_data() + lrt_design_data$test <- 'LRT' + lrt_design_data$reduced_design <- ~1 + return(lrt_design_data) +} # make_lrt_design_data + +make_dds_list <- function() { + # Create design data and dds object for Wald test type + wald_design_data <- make_design_data() + dds_wald <- make_dds(wald_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + lrt_design_data <- make_lrt_design_data() + dds_lrt <- make_dds(lrt_design_data, + config=config, + featureCounts='featurecounts.txt', + parallel=config$parallel$parallel) + + # Create dds_list + dds_list <- list(dds_wald=dds_wald, dds_lrt=dds_lrt) + return(dds_list) +} # make_dds_list + diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index de177b92..98b6d326 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -7,14 +7,18 @@ library(BiocParallel) library(ggplot2) library(AnnotationHub) library(dplyr) - devtools::load_all('../../../../lib/lcdbwf') source('test-functions.R') config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') text <- yaml::yaml.load_file('../../../../workflows/rnaseq/downstream/text.yaml') register(MulticoreParam(config$parallel$cores)) -# Mock function to capture mdcat output +# Wrapper function for inherits ggplot +is_ggplot <- function(x) { + inherits(x, "ggplot") +} + +# Function to capture mdcat output mdcat_output <- c() mock_mdcat <- function(...) { mdcat_output <<- c(mdcat_output, paste(..., collapse = " ")) @@ -38,75 +42,98 @@ make_deseq_results <- function(test='Wald', type='ashr', reduced_design=NULL) { return(list(dds=dds, res=res)) } -# ------ Test build_results_tabs function ------ # # Create objects for testing defaults dds_and_res <- make_deseq_results() dds <- dds_and_res$dds res <- dds_and_res$res$res -dds_list <- list(dds1=dds) -res_list <- list(res1=list(res=res, dds='dds1', label='Defaults')) -res_list <- lcdbwf:::attach_extra(res_list, config) - -test_that("build_results_tabs works with default config", { - expect_silent(build_results_tabs(res_list, dds_list, config, text)) -}) +wald_dds_list <- list(dds1=dds) +wald_res_list <- list(res1=list(res=res, dds='dds1', label='Defaults')) +wald_res_list <- lcdbwf:::attach_extra(wald_res_list, config) # Create objects for testing 'LRT' -dds_and_res <- make_deseq_results(test='LRT', reduced_design=~1) +dds_and_res <- make_deseq_results(test='LRT', type=NULL, reduced_design=~1) dds <- dds_and_res$dds res <- dds_and_res$res$res -dds_list <- list(dds1=dds) -res_list <- list(res1=list(res=res, dds='dds1', label='LRT')) -res_list <- lcdbwf:::attach_extra(res_list, config) - -# Test build_results_tabs function -test_that("build_results_tabs works with LRT config", { - expect_silent(build_results_tabs(res_list, dds_list, config, text)) -}) +lrt_dds_list <- list(dds1=dds) +lrt_res_list <- list(res1=list(res=res, dds='dds1', label='LRT')) +lrt_res_list <- lcdbwf:::attach_extra(lrt_res_list, config) -test_that("build_results_tabs works with diagnostics disabled", { - config$toggle$results_diagnostics <- FALSE - expect_silent(build_results_tabs(res_list, dds_list, config, text)) -}) +# ------ Test build_results_tabs function ------ # +test_that("build_results_tabs works with Wald test", { + # build_results_tabs requires 'dds_list' in .GlobalEnv + dds_list <<- wald_dds_list + plots <- suppressWarnings(build_results_tabs(wald_res_list, wald_dds_list, config, text)) + + # Check that each plot in the list is a ggplot object + for (name in names(plots)) { + expect_true(is_ggplot(plots[[name]]$ma_plot)) + expect_true(is_ggplot(plots[[name]]$volcano_plot)) + expect_true(is_ggplot(plots[[name]]$pval_hist_plot)) + # Check diagnostic plots + if (config$toggle$results_diagnostics) { + # diag_plot_list is a list of ggplot objects + for (diag_plot in plots[[name]]$diag_plot_list) { + expect_true(is_ggplot(diag_plot)) + } # for diag_plot + } # if config + } # for name +}) # test_that + +test_that("build_results_tabs works with LRT", { + # build_results_tabs requires 'dds_list' in .GlobalEnv + dds_list <<- lrt_dds_list + plots <- suppressWarnings(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) + + # Check that each plot in the list is a ggplot object + for (name in names(plots)) { + expect_true(is_ggplot(plots[[name]]$ma_plot)) + expect_true(is_ggplot(plots[[name]]$volcano_plot)) + expect_true(is_ggplot(plots[[name]]$pval_hist_plot)) + # Check diagnostic plots + if (config$toggle$results_diagnostics) { + # diag_plot_list is a list of ggplot objects + for (diag_plot in plots[[name]]$diag_plot_list) { + expect_true(is_ggplot(diag_plot)) + } # for diag_plot + } # if config + } # for name +}) # test_that +# ---------------------------------------------- # -test_that("build_results_tabs works with specific diagnostics results names", { - config$toggle$results_diagnostics <- TRUE - config$plotting$diagnostics_results_names <- c("res1") - expect_silent(build_results_tabs(res_list, dds_list, config, text)) +test_that("check_LRT identifies LRT results correctly", { + expect_true(check_LRT(lrt_res_list$res1$res)) + expect_false(check_LRT(wald_res_list$res1$res)) }) -test_that("build_results_tabs handles empty res_list", { - expect_silent(build_results_tabs(list(), dds_list, config, text)) -}) +# Test that mdcat is called with expected values for LRT +test_that("build_results_tabs calls mdcat with expected character for LRT", { + # build_results_tabs requires 'dds_list' in .GlobalEnv + dds_list <<- lrt_dds_list -test_that("check_LRT identifies LRT results correctly", { - res_LRT_result <- create_deseq_results(test='LRT', reduced_design=~1) - res_LRT <- res_LRT_result$res - expect_true(check_LRT(res_LRT)) + # Capture mdcat output + mdcat_output <<- c() + with_mock( + `lcdbwf:::mdcat` = mock_mdcat, + suppressWarnings(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) + ) - res_Wald_result <- create_deseq_results(test='Wald') - res_Wald <- res_Wald_result$res - expect_false(check_LRT(res_Wald)) + expect_true(any(grepl("LRT log2FoldChange values have been set to 0", mdcat_output))) }) -# Test that mdcat is called with expected values for LRT -test_that("build_results_tabs calls mdcat with expected values for LRT", { - res_LRT_result <- create_deseq_results(test='LRT', reduced_design=~1) - res_LRT <- res_LRT_result$res - dds <- res_LRT_result$dds - dds_list_LRT <- list(dds1=dds) - res_list_LRT <- list(res1=list(res=res_LRT, dds='dds1', label='LRT Test Label')) +# Test that mdcat is not called with LRT expected values for Wald test +test_that("build_results_tabs does not call mdcat with LRT expected character for Wald", { + # build_results_tabs requires 'dds_list' in .GlobalEnv + dds_list <<- wald_dds_list # Capture mdcat output mdcat_output <<- c() with_mock( `lcdbwf:::mdcat` = mock_mdcat, - build_results_tabs(res_list_LRT, dds_list_LRT, config, text) + suppressWarnings(build_results_tabs(wald_res_list, wald_dds_list, config, text)) ) - expect_false(any(grepl("Wald", mdcat_output))) - expect_true(any(grepl("LRT", mdcat_output))) + expect_false(any(grepl("LRT log2FoldChange values have been set to 0", mdcat_output))) }) From 6d76d0c1c6346da1b4d9ce9d2820abb1dd654e62 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 16:33:50 -0400 Subject: [PATCH 66/93] ignored Rplots.pdf and lib/include dir -- dont want to commit annotationhub db --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 52dab8bf..31ee86ef 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,5 @@ workflows/rnaseq/downstream/rnaseq.html *.xlsx *._* lib/lcdbwf/tests/testthat/featurecounts.txt +*Rplots.pdf +/lib/include/* From 5530bd368542f57e55cad029e882d21225d906aa Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 16:40:43 -0400 Subject: [PATCH 67/93] removed library and devtools lcdbwf loadings from tests --- lib/lcdbwf/tests/testthat/test-contrasts.R | 14 +++++++------- lib/lcdbwf/tests/testthat/test-dds.R | 7 +++---- lib/lcdbwf/tests/testthat/test-results.R | 22 +++++++++++----------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index b262150d..ce6ee457 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -1,12 +1,12 @@ -library(DESeq2) -library(testthat) -library(rlang) -library(stringr) -library(BiocParallel) -devtools::load_all('../../../../lib/lcdbwf') +#library(DESeq2) +#library(testthat) +#library(rlang) +#library(stringr) +#library(BiocParallel) +#devtools::load_all('../../../../lib/lcdbwf') +#register(MulticoreParam(config$parallel$cores)) config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') -register(MulticoreParam(config$parallel$cores)) is_deseq_res <- function(x) { inherits(x, "DESeqResults") diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index 7b02c80b..38c1141b 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -1,10 +1,9 @@ -library(DESeq2) -library(testthat) -devtools::load_all('../../../../lib/lcdbwf') +#library(DESeq2) +#library(testthat) +#devtools::load_all('../../../../lib/lcdbwf') config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') - # --------- Test strip_dotted_version_from_dds() ---------- # test_that("strip_dotted_version_from_dds works", { dds <- DESeq2::makeExampleDESeqDataSet() diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index 98b6d326..a3e74037 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -1,17 +1,17 @@ -library(testthat) -library(DESeq2) -library(lcdbwf) -library(rlang) -library(stringr) -library(BiocParallel) -library(ggplot2) -library(AnnotationHub) -library(dplyr) -devtools::load_all('../../../../lib/lcdbwf') +#library(testthat) +#library(DESeq2) +#library(lcdbwf) +#library(rlang) +#library(stringr) +#library(BiocParallel) +#library(ggplot2) +#library(AnnotationHub) +#library(dplyr) +#register(MulticoreParam(config$parallel$cores)) +#devtools::load_all('../../../../lib/lcdbwf') source('test-functions.R') config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') text <- yaml::yaml.load_file('../../../../workflows/rnaseq/downstream/text.yaml') -register(MulticoreParam(config$parallel$cores)) # Wrapper function for inherits ggplot is_ggplot <- function(x) { From f8ffa7d8159f78f84c58b443279a503904603674 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 16:44:49 -0400 Subject: [PATCH 68/93] removed code for testing the tests --- lib/lcdbwf/tests/testthat/test-contrasts.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index ce6ee457..83d057c2 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -21,14 +21,6 @@ coef <- "group_treatment_vs_control" dds_list <- make_dds_list() lrt_design_data <- make_lrt_design_data() -### TESTING ### -#test <- 'Wald' -#type <- 'ashr' -#dds_name <- 'dds_wald' -#contrast <- c("condition", "treatment", "control") -#label <- paste0("test=", test %||% "NULL/default (Wald)", ", type=", type %||% "NULL (Skip)") -############## - # Each row in the ASCII table indicates which combination of test, type, coef, and contrast # is tested by the respective indexed conditional statement in the following test_that code. From 814f29111a183b0eadf0c2b62fb455e27cf06d41 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 30 May 2024 16:49:15 -0400 Subject: [PATCH 69/93] un ignored testthat featurecounts.txt --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 31ee86ef..11d7394c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,5 @@ workflows/rnaseq/downstream/rnaseq_files workflows/rnaseq/downstream/rnaseq.html *.xlsx *._* -lib/lcdbwf/tests/testthat/featurecounts.txt *Rplots.pdf /lib/include/* From e157a8a9bcc6f7f25cb0fc2b2a4cac5e5fbd7837 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Fri, 31 May 2024 10:17:07 -0400 Subject: [PATCH 70/93] explination of LRT all(LFC == 0) --- workflows/rnaseq/downstream/text.yaml | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/text.yaml b/workflows/rnaseq/downstream/text.yaml index 348a0325..1a413122 100644 --- a/workflows/rnaseq/downstream/text.yaml +++ b/workflows/rnaseq/downstream/text.yaml @@ -118,12 +118,18 @@ results_plots: normalized to library size, not transcript length. We can say that a gene with higher baseMean than another gene has more observed reads, but we cannot say whether that is because it has a longer transcript or because - there are more transcripts. + there are more transcripts. For likelihood ratio test results, we impute + the log2FoldChange values to 0. See "log2FoldChange" below and [DESeq2 likelihood ratio test] + (https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) + for details as to why. volcano: | A **volcano plot** shows fewer dimensions than an M-A plot (it does not show normalized counts) but it can be useful for looking at the relationship of - adjusted p-value and log2FoldChange. + adjusted p-value and log2FoldChange. For likelihood ratio test results, we impute + the log2FoldChange values to 0. See "log2FoldChange" below and [DESeq2 likelihood ratio test] + (https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) + for details as to why. pval_hist: | The **raw p-value distribution** helps us assess if the data met the @@ -224,7 +230,16 @@ results_files: [apeglm](https://doi.org/10.1093/bioinformatics/bty895), [ashr](https://doi.org/10.1093/biostatistics/kxw041), and [normal](https://doi.org/10.1186/s13059-014-0550-8). **This is designed to - be the most important column to sort on.**. + be the most important column to sort on.**. We do not report meaningful + LFC values for results generated by the likelihood ratio test (LRT). The + LRT is a method for comparing the goodness of fit between a full model and + a reduced model, evaluating whether the reduced model adequately explains the + data without relying on a single pairwise comparison. The log2FoldChange is + only applicable for pairwise comparisons; therefore, we impute the + log2FoldChange values to 0. + See [DESeq2 likelihood ratio test](https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) + for more details. + - **lfcSE** is the standard error of the log2FoldChange estimate. In general, the log2 fold change for genes with high lfcSE will be shrunken more. This column is sometimes useful for diagnostics but not routinely used. From 76b6f03b19c0630b4645ba657879323927dbfbbd Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Tue, 4 Jun 2024 09:54:58 -0400 Subject: [PATCH 71/93] try fixing r-tests to exit nonzero if failed --- test/lcdb-wf-test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/lcdb-wf-test b/test/lcdb-wf-test index 1edb2576..95be364c 100755 --- a/test/lcdb-wf-test +++ b/test/lcdb-wf-test @@ -334,7 +334,7 @@ class Runner(object): if args.r_test: print_header("R test") - sp.run( + p = sp.run( 'eval "$(conda shell.bash hook)" ' f"&& conda activate {args.env_r} " '''&& Rscript -e "devtools::test('lib/lcdbwf', export_all=TRUE)"''', @@ -342,6 +342,8 @@ class Runner(object): check=True, executable="/bin/bash" ) + if p.returncode: + sys.exit(1) if args.ensure_docs: sp.run(["./ensure_docs.py"], check=True, cwd=TOPLEVEL / "ci") From dd208bb94fae8f3337146554c4598faa1ff3b521 Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 6 Jun 2024 09:45:32 -0400 Subject: [PATCH 72/93] be more careful about namespace in tests & lib code --- lib/lcdbwf/R/annotations.R | 4 +- lib/lcdbwf/R/contrasts.R | 50 ++++++------- lib/lcdbwf/R/dds.R | 18 ++--- lib/lcdbwf/R/plotting.R | 83 +++++++++++++--------- lib/lcdbwf/tests/testthat/test-contrasts.R | 33 +++++---- lib/lcdbwf/tests/testthat/test-dds.R | 14 ++-- lib/lcdbwf/tests/testthat/test-results.R | 11 --- 7 files changed, 107 insertions(+), 106 deletions(-) diff --git a/lib/lcdbwf/R/annotations.R b/lib/lcdbwf/R/annotations.R index d1a40b7d..db30d76c 100644 --- a/lib/lcdbwf/R/annotations.R +++ b/lib/lcdbwf/R/annotations.R @@ -26,7 +26,7 @@ get_annotation_hub <- function(config, localHub=NULL, force=NULL, cache=NULL){ } ah <- AnnotationHub::AnnotationHub( - hub=getAnnotationHubOption('URL'), + hub=AnnotationHub::getAnnotationHubOption('URL'), proxy=proxy, localHub=localHub, cache=cache @@ -95,7 +95,7 @@ get_annotation_db <- function(config, dbtype, genus_species=NULL, orgdb_key_over ) } - hits <- mcols(ah.query) %>% + hits <- SummarizedExperiment::mcols(ah.query) %>% as.data.frame() %>% dplyr::arrange(desc(rdatadateadded)) %>% dplyr::filter(rdataclass==dbtype) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index 8d5bf082..39986065 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -167,11 +167,11 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Initial check on test argument: # Make sure the 'test' passed to make_results is the test detected in the dds object if ('test' %in% names(dots)) { - if (dots$test == 'Wald' && (any(grepl('LRTStatistic', names(mcols(dds)))) || - any(grepl('LRTPvalue', names(mcols(dds)))))) { + if (dots$test == 'Wald' && (any(grepl('LRTStatistic', names(S4Vectors::mcols(dds)))) || + any(grepl('LRTPvalue', names(S4Vectors::mcols(dds)))))) { stop("The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") - } else if (dots$test == 'LRT' && (any(grepl('WaldStatistic', names(mcols(dds)))) || - any(grepl('WaldPvalue', names(mcols(dds)))))) { + } else if (dots$test == 'LRT' && (any(grepl('WaldStatistic', names(S4Vectors::mcols(dds)))) || + any(grepl('WaldPvalue', names(S4Vectors::mcols(dds)))))) { stop("The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") } } @@ -179,10 +179,10 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Detect 'test' type when the 'test' argument is missing from dots test_detected <- FALSE if (!'test' %in% names(dots)) { - if (any(grepl('LRTStatistic', names(mcols(dds)))) || any(grepl('LRTPvalue', names(mcols(dds))))) { + if (any(grepl('LRTStatistic', names(S4Vectors::mcols(dds)))) || any(grepl('LRTPvalue', names(S4Vectors::mcols(dds))))) { dots$test <- 'LRT' test_detected <- TRUE - } else if (any(grepl('WaldStatistic', names(mcols(dds)))) || any(grepl('WaldPvalue', names(mcols(dds))))) { + } else if (any(grepl('WaldStatistic', names(S4Vectors::mcols(dds)))) || any(grepl('WaldPvalue', names(S4Vectors::mcols(dds))))) { dots$test <- 'Wald' test_detected <- TRUE } else { @@ -196,8 +196,8 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ } # Call results() with the subset of dots that it accepts. - results_dots <- lcdbwf:::match_from_dots(dots, results) - res <- do.call("results", results_dots) + results_dots <- lcdbwf:::match_from_dots(dots, DESeq2::results) + res <- do.call(DESeq2::results, results_dots) # When make_results is called with 'test' set to 'LRT', # or when make_results is called with 'test' missing but @@ -225,12 +225,12 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ dots[['res']] <- res dots[['dds']] <- dds - lfcShrink_dots <- lcdbwf:::match_from_dots(dots, lfcShrink) - res <- do.call("lfcShrink", lfcShrink_dots) + lfcShrink_dots <- lcdbwf:::match_from_dots(dots, DESeq2::lfcShrink) + res <- do.call(DESeq2::lfcShrink, lfcShrink_dots) - metadata(res)$type <- dots$type + S4Vectors::metadata(res)$type <- dots$type } else { - metadata(res)$type <- NULL + S4Vectors::metadata(res)$type <- NULL } return( @@ -247,28 +247,28 @@ results_diagnostics <- function(res, dds, name, config, text){ print(knitr::kable(lcdbwf:::my_summary(res, dds, name))) lcdbwf:::folded_markdown(text$results_diagnostics$filter_ma, "Help") - filterThreshold <- metadata(res)$filterThreshold - p1 <- ggplot(res %>% as.data.frame() %>% mutate(filtered=res$baseMean < filterThreshold)) + - aes(x=log10(baseMean), y=log2FoldChange, color=filtered) + - geom_point() + filterThreshold <- S4Vectors::metadata(res)$filterThreshold + p1 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(filtered=res$baseMean < filterThreshold)) + + ggplot2::aes(x=log10(baseMean), y=log2FoldChange, color=filtered) + + ggplot2::geom_point() print(p1) lcdbwf:::folded_markdown(text$results_diagnostics$outlier_ma, "Help") - p2 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + - aes(x=log10(baseMean), y=log2FoldChange, color=outlier) + - geom_point() + p2 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) + + ggplot2::aes(x=log10(baseMean), y=log2FoldChange, color=outlier) + + ggplot2::geom_point() print(p2) lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_basemean, "Help") - p3 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + - aes(x=log10(baseMean), y=lfcSE, color=outlier) + - geom_point() + p3 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) + + ggplot2::aes(x=log10(baseMean), y=lfcSE, color=outlier) + + ggplot2::geom_point() print(p3) lcdbwf:::folded_markdown(text$results_diagnostics$lfcse_lfc, "Help") - p4 <- ggplot(res %>% as.data.frame() %>% mutate(outlier=is.na(res$pvalue))) + - aes(x=log2FoldChange, y=lfcSE, color=outlier) + - geom_point() + p4 <- ggplot2::ggplot(res %>% as.data.frame() %>% dplyr::mutate(outlier=is.na(res$pvalue))) + + ggplot2::aes(x=log2FoldChange, y=lfcSE, color=outlier) + + ggplot2::geom_point() print(p4) # Save plots to a list and return for testing diff --git a/lib/lcdbwf/R/dds.R b/lib/lcdbwf/R/dds.R index a97e23e8..ae28ef6f 100644 --- a/lib/lcdbwf/R/dds.R +++ b/lib/lcdbwf/R/dds.R @@ -130,13 +130,13 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL, # Check if we need to perform the LRT on the dds object if (test == 'Wald') { - dds <- DESeq(dds, test=test, ...) + dds <- DESeq2::DESeq(dds, test=test, ...) return(dds) } else if (test == 'LRT') { if (is.null(reduced_design)){ stop("When using LRT, reduced_design must be provided") } - dds <- DESeq(dds, test=test, reduced=reduced_design, ...) + dds <- DESeq2::DESeq(dds, test=test, reduced=reduced_design, ...) return(dds) } } @@ -176,7 +176,7 @@ strip_dotted_version_from_dds <- function(dds, force=FALSE){ #' biological replicate (e.g., dds$biorep) collapseReplicates2 <- function(object, groupby){ collapsed <- DESeq2::collapseReplicates(object, groupby) - colData(collapsed)[,1] <- rownames(colData(collapsed)) + SummarizedExperiment::colData(collapsed)[,1] <- rownames(SummarizedExperiment::colData(collapsed)) return(collapsed) } @@ -206,21 +206,21 @@ dds_diagnostics <- function(dds_list, text){ p <- assays(dds_list[[name]])[['cooks']] %>% as.data.frame() %>% tidyr::pivot_longer(everything()) %>% - ggplot() + - aes(x=name, y=log10(value)) + - geom_boxplot() + - ylab("log10(Cook's distance)") + ggplot2::ggplot() + + ggplot2::aes(x=name, y=log10(value)) + + ggplot2::geom_boxplot() + + ggplot2::ylab("log10(Cook's distance)") print(p) mdcat("#### colData") mdcat(text$dds_diagnostics$colData) - cdata <- colData(dds_list[[name]]) %>% as.data.frame + cdata <- SummarizedExperiment::colData(dds_list[[name]]) %>% as.data.frame cdata <- cdata[, !grepl("filename", colnames(cdata))] print(htmltools::tagList(datatable(cdata))) mdcat("#### Design matrix") mdcat(text$dds_diagnostics$design_matrix, " The design is: `", deparse(design(dds_list[[name]])), "`") - mmat <- model.matrix(design(dds_list[[name]]), data=colData(dds_list[[name]])) %>% as.data.frame() + mmat <- model.matrix(design(dds_list[[name]]), data=SummarizedExperiment::colData(dds_list[[name]])) %>% as.data.frame() print(htmltools::tagList(datatable(mmat))) } } diff --git a/lib/lcdbwf/R/plotting.R b/lib/lcdbwf/R/plotting.R index c850b9dc..9e7bc8e5 100644 --- a/lib/lcdbwf/R/plotting.R +++ b/lib/lcdbwf/R/plotting.R @@ -7,9 +7,14 @@ plotPCA.ly <- function(rld, intgroup){ mat <- DESeq2::plotPCA(rld, intgroup, returnData=TRUE) pv <- attr(mat, 'percentVar') - p <- ggplot2::ggplot(data=mat, aes_string(x='PC1', y='PC2', color='group', label='name')) + - geom_point(size=3) + xlab(paste0('PC1: ', round(pv[1]*100), '% variance')) + - ylab(paste0('PC2: ', round(pv[2]*100), '% variance')) + coord_fixed() + p <- ggplot2::ggplot( + data=mat, + ggplot2::aes_string(x='PC1', y='PC2', color='group', label='name') + ) + + ggplot2::geom_point(size=3) + + ggplot2::xlab(paste0('PC1: ', round(pv[1]*100), '% variance')) + + ggplot2::ylab(paste0('PC2: ', round(pv[2]*100), '% variance')) + + ggplot2::coord_fixed() return(p) } @@ -70,9 +75,15 @@ plotMA_label <- function(res, down.max.de <- down.max[rownames(down.max) %in% rownames(de.list),] # create ggplot with appropriate layers - p <- ggplot2::ggplot(res, aes(baseMean, log2FoldChange)) + - ggplot2::geom_point(col="gray40") + scale_x_log10() + ylim(fc_lim[1], fc_lim[2]) + - theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + p <- ggplot2::ggplot(res, ggplot2::aes(baseMean, log2FoldChange)) + + ggplot2::geom_point(col="gray40") + + ggplot2::scale_x_log10() + + ggplot2::ylim(fc_lim[1], fc_lim[2]) + + ggplot2::theme_bw() + + ggplot2::theme( + panel.grid.major=ggplot2::element_blank(), + panel.grid.minor=ggplot2::element_blank() + ) p <- p + ggplot2::geom_hline(yintercept = 0, col="red", size=2, alpha=0.5) # add horizontal line p <- p + ggplot2::geom_point(data=up.max, col="gray40", pch=2) # add points above max y @@ -108,7 +119,7 @@ plotMA_label <- function(res, # add labels p <- p + ggplot2::geom_point(data=label.list, col="black", pch=1, size=3) - p <- p + ggrepel::geom_label_repel(data=label.list, aes(label=label.list$gene.labels, fontface="italic")) + p <- p + ggrepel::geom_label_repel(data=label.list, ggplot2::aes(label=label.list$gene.labels, fontface="italic")) } return(p) @@ -171,9 +182,11 @@ plot_volcano_label <- function(res, down.max.de <- down.max[rownames(down.max) %in% rownames(de.list),] # create ggplot with appropriate layers - p <- ggplot(res, aes(log2FoldChange, -log10(padj))) + - geom_point(col="gray40") + xlim(fc_lim[1], fc_lim[2]) + - theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) + p <- ggplot2::ggplot(res, ggplot2::aes(log2FoldChange, -log10(padj))) + + ggplot2::geom_point(col="gray40") + + ggplot2::xlim(fc_lim[1], fc_lim[2]) + + ggplot2::theme_bw() + + ggplot2::theme(panel.grid.major=ggplot2::element_blank(), panel.grid.minor=ggplot2::element_blank()) p <- p + ggplot2::geom_point(data=up.max, col="gray40", pch=2) # add points above max y p <- p + ggplot2::geom_point(data=down.max, col="gray40", pch=6) # add points below min y @@ -211,7 +224,7 @@ plot_volcano_label <- function(res, # add labels p <- p + ggplot2::geom_point(data=label.list, col="black", pch=1, size=3) - p <- p + ggrepel::geom_label_repel(data=label.list, aes(label=label.list$gene.labels, fontface="italic")) + p <- p + ggrepel::geom_label_repel(data=label.list, ggplot2::aes(label=label.list$gene.labels, fontface="italic")) } return(p) @@ -269,14 +282,14 @@ my.counts <- function(gene, dds, label=NULL, intgroup='group'){ # Assumption: color genes by group geneCounts <- plotCounts(dds, gene=gene, intgroup=intgroup, returnData=TRUE) - p <- ggplot2::ggplot(geneCounts, aes_string(x=intgroup, y='count', color=intgroup, group=intgroup)) + - scale_y_log10() + - ggplot2::geom_point(position=position_jitter(width=.1, height=0), size=3) + + p <- ggplot2::ggplot(geneCounts, ggplot2::aes_string(x=intgroup, y='count', color=intgroup, group=intgroup)) + + ggplot2::scale_y_log10() + + ggplot2::geom_point(position=ggplot2::position_jitter(width=.1, height=0), size=3) + ggplot2::geom_line(color='#000000') + ggplot2::ggtitle(gene) if (!is.null(label)){ - p <- p + ggtitle(label) + p <- p + ggplot2::ggtitle(label) } return(p) } @@ -326,10 +339,10 @@ counts.plot <- function(df, rank.nb=NULL, no.aes=FALSE, facet='label') { dplyr::mutate(facet = factor(!!!syms(facet), levels = unique(!!!syms(facet)))) plt <- ggplot2::ggplot(df) + ggplot2::scale_y_log10() + - ggplot2::geom_point(position=position_jitter(width=.1, height=0), size=3) + + ggplot2::geom_point(position=ggplot2::position_jitter(width=.1, height=0), size=3) + ggplot2::geom_line(color='#000000') + ggplot2::theme_bw() + - ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + ggplot2::facet_wrap(.~facet, ncol=1, scales='free_y') if (!no.aes) { plt <- plt + @@ -355,13 +368,13 @@ pval_hist <- function(res){ df <- rbind(data.frame(x=h1$mids, counts=h1$counts, label='counts too low'), data.frame(x=h2$mids, counts=h2$counts, label='pass') ) - plt <- ggplot2::ggplot(df, aes(x=x, y=counts, fill=label)) + - geom_bar(stat = 'identity', color='gray20') + - theme_classic() + - scale_fill_manual(values=c("#EBE379", "#A3DAE0")) + - xlab('p-value') + - ylab('frequency') + - theme(legend.position = c(0.8, 0.8)) + plt <- ggplot2::ggplot(df, ggplot2::aes(x=x, y=counts, fill=label)) + + ggplot2::geom_bar(stat = 'identity', color='gray20') + + ggplot2::theme_classic() + + ggplot2::scale_fill_manual(values=c("#EBE379", "#A3DAE0")) + + ggplot2::xlab('p-value') + + ggplot2::ylab('frequency') + + ggplot2::theme(legend.position = c(0.8, 0.8)) return(plt) } @@ -393,7 +406,7 @@ sizefactors_vs_total <- function(dds){ trc <- colSums(counts(dds)) %>% tibble::enframe(value = 'Total Read Count') trc_vs_sf <- dplyr::full_join(sf, trc, by='name') - p <- ggplot2::ggplot(data=trc_vs_sf, aes_string(x="`Total Read Count`", y="`Size Factor`", label='name')) + + p <- ggplot2::ggplot(data=trc_vs_sf, ggplot2::aes_string(x="`Total Read Count`", y="`Size Factor`", label='name')) + ggplot2::geom_point(size=3) + ggplot2::theme_bw() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) @@ -465,7 +478,7 @@ lfc_scatter <- function(res_i, res_j, padj.thr=0.1, name.col='SYMBOL', label_i=N by= name.col) # add significance column df <- df %>% - mutate('Significance' = case_when( + dplyr::mutate('Significance' = case_when( (padj.x <= padj.thr) & (padj.y <= padj.thr) & (log2FoldChange.x * log2FoldChange.y >= 0) ~ 'Both - same LFC sign', (padj.x <= padj.thr) & (padj.y <= padj.thr) & (log2FoldChange.x * log2FoldChange.y < 0) ~ 'Both - opposite LFC sign', (padj.x <= padj.thr) ~ label_i, @@ -482,16 +495,16 @@ lfc_scatter <- function(res_i, res_j, padj.thr=0.1, name.col='SYMBOL', label_i=N names(color.palette) <- c('Both - same LFC sign', 'Both - opposite LFC sign', 'None', label_i, label_j) - p <- ggplot(df %>% arrange(Significance), aes_string(x='log2FoldChange.x', y='log2FoldChange.y', + p <- ggplot2::ggplot(df %>% arrange(Significance), ggplot2::aes_string(x='log2FoldChange.x', y='log2FoldChange.y', color='Significance', label=name.col)) + - geom_point(size=1) + - theme_bw() + - scale_color_manual(values=color.palette) + - geom_abline(color="#333333", linetype="dashed", size=0.5, alpha=0.7) + - geom_hline(yintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + - geom_vline(xintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + - xlab(label_i) + - ylab(label_j) + ggplot2::geom_point(size=1) + + ggplot2::theme_bw() + + ggplot2::scale_color_manual(values=color.palette) + + ggplot2::geom_abline(color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + ggplot2::geom_hline(yintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + ggplot2::geom_vline(xintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + ggplot2::xlab(label_i) + + ggplot2::ylab(label_j) return(p) } diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 83d057c2..f4f5a5b9 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -1,9 +1,8 @@ -#library(DESeq2) -#library(testthat) -#library(rlang) -#library(stringr) -#library(BiocParallel) -#devtools::load_all('../../../../lib/lcdbwf') +devtools::load_all('../../../../lib/lcdbwf') + +# Used for the %||% operator +library(rlang) + #register(MulticoreParam(config$parallel$cores)) config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') @@ -95,33 +94,33 @@ for (test in tests) { # A if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'ashr')) { expected_char <- paste(test %||% 'Wald', "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) + expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # B } else if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'apeglm')) { - coef <- str_split(coef, "_")[[1]] + coef <- stringr::str_split(coef, "_")[[1]] expected_char <- paste(test %||% 'Wald', "test p-value:", coef[1], coef[2], coef[3], coef[4]) - expect_true(mcols(res$res)$description[4] == expected_char) + expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # C } else if ((is.null(test) || test == 'Wald') && (is.null(type) || type == 'normal')) { expected_char <- paste(test %||% 'Wald', "statistic:", contrast[1], contrast[2], "vs", contrast[3]) - expect_true(mcols(res$res)$description[4] == expected_char) + expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # D } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type != 'normal')) { expected_char <- paste0(test, " p-value: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) + expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # E } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { expect_true(all(res$res$log2FoldChange == 0)) expected_char <- paste0(test, " statistic: '", lrt_mcols_description) - expect_true(mcols(res$res)$description[4] == expected_char) + expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) } else { stop(paste(label, 'was not checked')) } # Check for expected type stored in the result's metadata if (!is.null(type)) { - expect_true(identical(metadata(res$res)$type, type)) + expect_true(identical(S4Vectors::metadata(res$res)$type, type)) } else if (is.null(type)) { - expect_true(is.null(metadata(res$res)$type)) + expect_true(is.null(S4Vectors::metadata(res$res)$type)) } }) # test_that } # for type in shrinkage_types @@ -138,7 +137,7 @@ test_that("make_results can handle dds object directly", { contrast=contrast) expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) - expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) }) # test_that # ---------------------------------------------------------------- # @@ -217,7 +216,7 @@ for (type in c('ashr', 'apeglm', 'normal')) { expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(!all(res$res$log2FoldChange == 0)) - expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) } else if (type == 'apeglm') { res <- make_results(dds_name='dds_wald', label='Shrink Wald results', @@ -227,7 +226,7 @@ for (type in c('ashr', 'apeglm', 'normal')) { expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(!all(res$res$log2FoldChange == 0)) - expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) } # if type }) # test_that } # for type diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index 38c1141b..a3e0b09d 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -31,8 +31,8 @@ test_that("make_dds handles minimum required design data of sampletale, expect_true(inherits(dds, "DESeqDataSet")) # Check that the WaldStatistic has been generated as Wald is the default test that # is run with minimal input to make_dds() - expect_true(any(grepl('Wald', colnames(mcols(dds))))) - expect_false(any(grepl('LRT', colnames(mcols(dds))))) + expect_true(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) + expect_false(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) }) # test_that test_that("make_dds handles design data where Wald test is specified explicitly", { @@ -45,8 +45,8 @@ test_that("make_dds handles design data where Wald test is specified explicitly" # Check that the dds object is a DESeqDataSet (not NULL) expect_true(inherits(dds, "DESeqDataSet")) # Check that the WaldStatistic has been generated when Wald is passed to make_dds() - expect_true(any(grepl('Wald', colnames(mcols(dds))))) - expect_false(any(grepl('LRT', colnames(mcols(dds))))) + expect_true(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) + expect_false(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) }) # test_that test_that("make_dds handles required design data for LRT", { @@ -60,8 +60,8 @@ test_that("make_dds handles required design data for LRT", { # Check that the dds object is a DESeqDataSet (not NULL) expect_true(inherits(dds, "DESeqDataSet")) # Check that the WaldStatistic has been generated - expect_true(any(grepl('LRT', colnames(mcols(dds))))) - expect_false(any(grepl('Wald', colnames(mcols(dds))))) + expect_true(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) + expect_false(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) }) # test_that # Now we intentionally call make_results with incompatible parameters @@ -110,7 +110,7 @@ test_that("collapseReplicates2 collapses the two control replicates and two trea # Perform replicate collapsing dds_collapsed <- collapseReplicates2(dds, dds$group) - colData_collapsed <- as.data.frame(colData(dds_collapsed)) + colData_collapsed <- as.data.frame(SummarizedExperiment::colData(dds_collapsed)) expect_equal(length(as.character(colData_collapsed$group[colData_collapsed$group == 'control'])), 1) expect_equal(length(as.character(colData_collapsed$group[colData_collapsed$group == 'treatment'])), 1) # Check if the first column of colData matches rownames diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index a3e74037..bd0cc97a 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -1,14 +1,3 @@ -#library(testthat) -#library(DESeq2) -#library(lcdbwf) -#library(rlang) -#library(stringr) -#library(BiocParallel) -#library(ggplot2) -#library(AnnotationHub) -#library(dplyr) -#register(MulticoreParam(config$parallel$cores)) -#devtools::load_all('../../../../lib/lcdbwf') source('test-functions.R') config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') text <- yaml::yaml.load_file('../../../../workflows/rnaseq/downstream/text.yaml') From 9828f4690e7bdee50cfabcf4ed46aafa5fd2829b Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 6 Jun 2024 11:17:02 -0400 Subject: [PATCH 73/93] directly manipulate .GlobalEnv from within tests --- lib/lcdbwf/tests/testthat/test-contrasts.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index f4f5a5b9..9b0776b8 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -18,6 +18,10 @@ shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) contrast <- c("group", "treatment", "control") coef <- "group_treatment_vs_control" dds_list <- make_dds_list() +# Ensure dds_list makes it into the global environment, no matter what fancy +# stuff {testthat} is doing. +assign("dds_list", dds_list, envir=.GlobalEnv) + lrt_design_data <- make_lrt_design_data() # Each row in the ASCII table indicates which combination of test, type, coef, and contrast @@ -234,7 +238,8 @@ for (type in c('ashr', 'apeglm', 'normal')) { # ---------------------- missing dds_list ------------------------ # -remove(dds_list) +orig_dds_list <- dds_list +remove(dds_list, envir=.GlobalEnv) test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { expect_error(make_results(dds_name='dds_wald', label='missing dds_list', @@ -242,4 +247,6 @@ test_that("make_results errors when a dds_name is passed and dds_list is missing contrast=contrast), "Can't find dds_list in global environment.") }) # test_that +# Put it back into the global env +assign("dds_list", orig_dds_list, envir=.GlobalEnv) # ---------------------------------------------------------------- # From f5b8f7839821aad63cc0c23911c716a6b4b6f427 Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 6 Jun 2024 11:18:39 -0400 Subject: [PATCH 74/93] expose config arg --- lib/lcdbwf/tests/testthat/test-contrasts.R | 3 ++- lib/lcdbwf/tests/testthat/test-functions.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 9b0776b8..e3699123 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -17,7 +17,8 @@ tests <- list('Wald', 'LRT', NULL) shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) contrast <- c("group", "treatment", "control") coef <- "group_treatment_vs_control" -dds_list <- make_dds_list() +dds_list <- make_dds_list(config) + # Ensure dds_list makes it into the global environment, no matter what fancy # stuff {testthat} is doing. assign("dds_list", dds_list, envir=.GlobalEnv) diff --git a/lib/lcdbwf/tests/testthat/test-functions.R b/lib/lcdbwf/tests/testthat/test-functions.R index 5028c960..2189448d 100644 --- a/lib/lcdbwf/tests/testthat/test-functions.R +++ b/lib/lcdbwf/tests/testthat/test-functions.R @@ -19,7 +19,7 @@ make_lrt_design_data <- function() { return(lrt_design_data) } # make_lrt_design_data -make_dds_list <- function() { +make_dds_list <- function(config) { # Create design data and dds object for Wald test type wald_design_data <- make_design_data() dds_wald <- make_dds(wald_design_data, From b0fe18a276b0bb1f7209f3c818a3ed9908cf90dd Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 6 Jun 2024 11:20:28 -0400 Subject: [PATCH 75/93] make testthat tests exit nonzero upon failure --- test/lcdb-wf-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lcdb-wf-test b/test/lcdb-wf-test index 95be364c..df59b24c 100755 --- a/test/lcdb-wf-test +++ b/test/lcdb-wf-test @@ -337,7 +337,7 @@ class Runner(object): p = sp.run( 'eval "$(conda shell.bash hook)" ' f"&& conda activate {args.env_r} " - '''&& Rscript -e "devtools::test('lib/lcdbwf', export_all=TRUE)"''', + '''&& Rscript -e "devtools::test('lib/lcdbwf', reporter=c('summary', 'fail'), export_all=TRUE)"''', shell=True, check=True, executable="/bin/bash" From 75e62de7f7d250d0a7498f0c4dea84a948888a82 Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 6 Jun 2024 12:51:19 -0400 Subject: [PATCH 76/93] clean up printed output during tests --- lib/lcdbwf/tests/testthat/test-results.R | 29 ++++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index bd0cc97a..d6af3352 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -53,7 +53,7 @@ lrt_res_list <- lcdbwf:::attach_extra(lrt_res_list, config) test_that("build_results_tabs works with Wald test", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- wald_dds_list - plots <- suppressWarnings(build_results_tabs(wald_res_list, wald_dds_list, config, text)) + plots <- capture_output(build_results_tabs(wald_res_list, wald_dds_list, config, text)) # Check that each plot in the list is a ggplot object for (name in names(plots)) { @@ -73,7 +73,7 @@ test_that("build_results_tabs works with Wald test", { test_that("build_results_tabs works with LRT", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- lrt_dds_list - plots <- suppressWarnings(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) + plots <- capture_output(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) # Check that each plot in the list is a ggplot object for (name in names(plots)) { @@ -100,29 +100,18 @@ test_that("check_LRT identifies LRT results correctly", { test_that("build_results_tabs calls mdcat with expected character for LRT", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- lrt_dds_list - - # Capture mdcat output - mdcat_output <<- c() - with_mock( - `lcdbwf:::mdcat` = mock_mdcat, + output <- capture_output({ suppressWarnings(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) - ) - - expect_true(any(grepl("LRT log2FoldChange values have been set to 0", mdcat_output))) + }) + expect_true(any(grepl("LRT log2FoldChange values have been set to 0", output))) }) # Test that mdcat is not called with LRT expected values for Wald test test_that("build_results_tabs does not call mdcat with LRT expected character for Wald", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- wald_dds_list - - # Capture mdcat output - mdcat_output <<- c() - with_mock( - `lcdbwf:::mdcat` = mock_mdcat, - suppressWarnings(build_results_tabs(wald_res_list, wald_dds_list, config, text)) - ) - - expect_false(any(grepl("LRT log2FoldChange values have been set to 0", mdcat_output))) + output <- capture_output({ + build_results_tabs(wald_res_list, wald_dds_list, config, text) + }) + expect_false(any(grepl("LRT log2FoldChange values have been set to 0", output))) }) - From 2bcc279f1405960d7cee7389901b47689e943d80 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:00:45 -0400 Subject: [PATCH 77/93] simplified checks --- lib/lcdbwf/R/contrasts.R | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index 39986065..f41d2067 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -126,8 +126,8 @@ dds_coefs <- function(dds, ..., expand=FALSE){ #' @param dds_list List of dds objects. If NULL, then look in the global #' environment for an object called "dds_list" and use that. #' @param type Type of shrinkage for use by lfcShrink(). If no type is given, -#' we use the current DESeq2 default argument for lfcShrink(type=). If -#' NULL is given, we skip lfcShrink(). +#' we use the current DESeq2 default argument for Type. If +#' NULL is given, we skip lfcShrink() altogether and directly return the object from results(). #' @param ... Additional arguments are passed to results() and lfcShrink(). If #' "parallel" is not explicitly specified here, then look in the global env for #' a variable called "config" and find the parallel config setting from there. @@ -164,25 +164,19 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # say, 'dds'. dots[['object']] <- dds - # Initial check on test argument: - # Make sure the 'test' passed to make_results is the test detected in the dds object + # Ensure any provided `test` argument is consistent with the dds object provided. + # This uses names from S4Vectors::mcols(dds) to detect how the dds object was created. if ('test' %in% names(dots)) { - if (dots$test == 'Wald' && (any(grepl('LRTStatistic', names(S4Vectors::mcols(dds)))) || - any(grepl('LRTPvalue', names(S4Vectors::mcols(dds)))))) { - stop("The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") - } else if (dots$test == 'LRT' && (any(grepl('WaldStatistic', names(S4Vectors::mcols(dds)))) || - any(grepl('WaldPvalue', names(S4Vectors::mcols(dds)))))) { - stop("The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") + if ((dots$test == 'Wald' && any(grepl('LRT', names(S4Vectors::mcols(dds))))) || + (dots$test == 'LRT' && any(grepl('Wald', names(S4Vectors::mcols(dds)))))) { + stop("The 'test' passed to make_results does not match the detected test type in dds") } - } - - # Detect 'test' type when the 'test' argument is missing from dots - test_detected <- FALSE - if (!'test' %in% names(dots)) { - if (any(grepl('LRTStatistic', names(S4Vectors::mcols(dds)))) || any(grepl('LRTPvalue', names(S4Vectors::mcols(dds))))) { + } else { + test_detected <- FALSE + if (any(grepl('LRT', names(S4Vectors::mcols(dds))))) { dots$test <- 'LRT' test_detected <- TRUE - } else if (any(grepl('WaldStatistic', names(S4Vectors::mcols(dds)))) || any(grepl('WaldPvalue', names(S4Vectors::mcols(dds))))) { + } else if (any(grepl('Wald', names(S4Vectors::mcols(dds))))) { dots$test <- 'Wald' test_detected <- TRUE } else { @@ -271,7 +265,7 @@ results_diagnostics <- function(res, dds, name, config, text){ ggplot2::geom_point() print(p4) - # Save plots to a list and return for testing + # Save plots to a list and return for testing plots <- list(p1=p1, p2=p2, p3=p3, p4=p4) return(plots) } From 7844d9f390232856b4b2c77b46111ef32d5134b9 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:01:22 -0400 Subject: [PATCH 78/93] added second table for second half of checks that dont fit in first table --- lib/lcdbwf/tests/testthat/test-contrasts.R | 168 ++++++++++++--------- 1 file changed, 93 insertions(+), 75 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index e3699123..1cbb22bd 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -1,15 +1,22 @@ -devtools::load_all('../../../../lib/lcdbwf') +# For developement +#library(DESeq2) +#library(testthat) +#library(stringr) +#library(BiocParallel) +#register(MulticoreParam(config$parallel$cores)) +devtools::load_all('../../../../lib/lcdbwf') # Used for the %||% operator library(rlang) -#register(MulticoreParam(config$parallel$cores)) config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') +# Wrapper function for inherits DESeqResults is_deseq_res <- function(x) { inherits(x, "DESeqResults") } + # Test all combinations of test and type # NULL shrinkage type skips lfcShrink # NULL test type runs Wald (default test) @@ -17,34 +24,35 @@ tests <- list('Wald', 'LRT', NULL) shrinkage_types <- list('ashr', 'apeglm', 'normal', NULL) contrast <- c("group", "treatment", "control") coef <- "group_treatment_vs_control" + +# Creates a `dds_list` in the global namespace with names `dds_wald` and `dds_lrt` to be used below. dds_list <- make_dds_list(config) # Ensure dds_list makes it into the global environment, no matter what fancy # stuff {testthat} is doing. assign("dds_list", dds_list, envir=.GlobalEnv) - lrt_design_data <- make_lrt_design_data() # Each row in the ASCII table indicates which combination of test, type, coef, and contrast # is tested by the respective indexed conditional statement in the following test_that code. -#+---------+-------+-------+------+----------+-------+ -#| Results | Test | Type | Coef | Contrast | Check | -#+---------+-------+-------+------+----------+-------+ -#| 1 | LRT | NULL | - | - | E | -#| 2 | LRT | ashr | - | - | F | -#| 2 | LRT | apeglm| - | - | F | -#| 2 | LRT | normal| - | - | F | -#| 5 | Wald | NULL | - | yes | C | -#| 3 | Wald | ashr | - | yes | A | -#| 7 | Wald | apeglm| yes | - | B | -#| 3 | Wald | normal| - | yes | C | -#| 6 | NULL | NULL | - | yes | C | -#| 4 | NULL | ashr | - | yes | A | -#| 8 | NULL | apeglm| yes | - | B | -#| 4 | NULL | normal| - | yes | C | -#+---------+-------+-------+------+----------+-------+ - +#+---------+-------+--------+------+----------+-------+----------+ +#| Results | Test | Type | Coef | Contrast | Check | DDS | +#+---------+-------+--------+------+----------+-------+----------+ +#| 1 | LRT | NULL | - | - | D | dds_lrt | +#| 2 | LRT | ashr | - | - | E | dds_lrt | +#| 2 | LRT | apeglm | - | - | E | dds_lrt | +#| 2 | LRT | normal | - | - | E | dds_lrt | +#| 5 | Wald | NULL | - | yes | C | dds_wald | +#| 3 | Wald | ashr | - | yes | A | dds_wald | +#| 7 | Wald | apeglm | yes | - | B | dds_wald | +#| 3 | Wald | normal | - | yes | C | dds_wald | +#| 6 | NULL | NULL | - | yes | C | dds_wald | +#| 4 | NULL | ashr | - | yes | A | dds_wald | +#| 8 | NULL | apeglm | yes | - | B | dds_wald | +#| 4 | NULL | normal | - | yes | C | dds_wald | +#+---------+-------+--------+------+----------+-------+----------+ +# for (test in tests) { for (type in shrinkage_types) { if (test == 'Wald' || is.null(test)) { @@ -57,31 +65,43 @@ for (test in tests) { from DDS"), { # 'Results' from the table above # 1 + # | 1 | LRT | NULL | - | - | D | if ((!is.null(test) && test == 'LRT') && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL) # 2 } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type %in% c('ashr', 'apeglm', 'normal'))) { # 'Check' from the table above # F + # | 2 | LRT | ashr | - | - | E | + # | 2 | LRT | apeglm| - | - | E | + # | 2 | LRT | normal| - | - | E | expect_error(make_results(dds_name=dds_name, label=label, type=type), "You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") return() # 3 + # | 3 | Wald | ashr | - | yes | A | + # | 3 | Wald | normal| - | yes | C | } else if (!is.null(test) && type != 'apeglm' && !is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) # 4 + # | 4 | NULL | ashr | - | yes | A | + # | 4 | NULL | normal| - | yes | C | } else if (is.null(test) && type != 'apeglm' && !is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=type, contrast=contrast) # 5 + # | 5 | Wald | NULL | - | yes | C | } else if (!is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) # 6 + # | 6 | NULL | NULL | - | yes | C | } else if (is.null(test) && is.null(type)) { res <- make_results(dds_name=dds_name, label=label, type=NULL, contrast=contrast) # 7 + # | 7 | Wald | apeglm| yes | - | B | } else if (!is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) # 8 + # | 8 | NULL | apeglm| yes | - | B | } else if (is.null(test) && type == 'apeglm') { res <- make_results(dds_name=dds_name, label=label, type=type, coef=coef) } else { @@ -97,24 +117,29 @@ for (test in tests) { as.character(lrt_design_data$reduced_design)[2], "'") # 'Check' from the table above # A + # | 3 | Wald | ashr | - | yes | A | + # | 4 | NULL | ashr | - | yes | A | if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'ashr')) { expected_char <- paste(test %||% 'Wald', "test p-value:", contrast[1], contrast[2], "vs", contrast[3]) expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # B + # | 7 | Wald | apeglm| yes | - | B | + # | 8 | NULL | apeglm| yes | - | B | } else if ((is.null(test) || test == 'Wald') && (!is.null(type) && type == 'apeglm')) { coef <- stringr::str_split(coef, "_")[[1]] expected_char <- paste(test %||% 'Wald', "test p-value:", coef[1], coef[2], coef[3], coef[4]) expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # C + # | 5 | Wald | NULL | - | yes | C | + # | 3 | Wald | normal| - | yes | C | + # | 6 | NULL | NULL | - | yes | C | + # | 4 | NULL | normal| - | yes | C | } else if ((is.null(test) || test == 'Wald') && (is.null(type) || type == 'normal')) { expected_char <- paste(test %||% 'Wald', "statistic:", contrast[1], contrast[2], "vs", contrast[3]) expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) # D - } else if ((!is.null(test) && test == 'LRT') && (!is.null(type) && type != 'normal')) { - expected_char <- paste0(test, " p-value: '", lrt_mcols_description) - expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) - # E - } else if ((!is.null(test) && test == 'LRT') && (is.null(type) || type == 'normal')) { + # | 1 | LRT | NULL | - | - | D | + } else if ((!is.null(test) && test == 'LRT') && (is.null(type))) { expect_true(all(res$res$log2FoldChange == 0)) expected_char <- paste0(test, " statistic: '", lrt_mcols_description) expect_true(S4Vectors::mcols(res$res)$description[4] == expected_char) @@ -131,60 +156,62 @@ for (test in tests) { } # for type in shrinkage_types } # for test in tests - +#+---------+--------+---------+------+----------+----------+ +#| Results | Test | Type | Coef | Contrast | DDS | +#+---------+--------+---------+------+----------+----------+ +#| 1 | NULL | ashr | - | yes | dds_wald | +#| 2 | Wald | NULL | - | - | dds_lrt | +#| 3 | LRT | NULL | - | - | dds_wald | +#| 4 | LRT | ashr | - | - | dds_lrt | +#| 5 | LRT | apeglm | - | - | dds_lrt | +#| 6 | LRT | normal | - | - | dds_lrt | +#| 7 | NULL | NULL | - | - | dds_lrt | +#| 8 | Wald | ashr | - | yes | dds_wald | +#| 9 | Wald | normal | - | yes | dds_wald | +#| 10 | Wald | apeglm | yes | - | dds_wald | +#+---------+--------+---------+------+----------+----------+ +# # -------------------------- direct dds -------------------------- # +# 1 +# | 1 | NULL | ashr | - | yes | dds_wald | test_that("make_results can handle dds object directly", { dds <- dds_list[['dds_wald']] # Directly pass the dds object - res <- make_results(dds_name=dds, - label='Direct DDS', - type='ashr', - contrast=contrast) + res <- make_results(dds_name=dds, label='Direct DDS', type='ashr', contrast=contrast) expect_true(is_deseq_res(res$res)) expect_true(identical(names(res), c('res', 'dds', 'label'))) expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) -}) # test_that +}) # ---------------------------------------------------------------- # # ---- make_results with dds_lrt but with wald test specified ---- # -# Similar structure to the ASCII table depicted tests from above -# but with the 'test' argument included in make_results -# and test == 'LRT' +# 2 +# | 2 | Wald | NULL | - | - | dds_lrt | test_that("make_results errors when user passes mismatched test == 'Wald' with LRT DDS", { - expect_error(make_results(dds_name='dds_lrt', - label='Shrink lrt results', - type=NULL, - test='Wald'), + expect_error(make_results(dds_name='dds_lrt', label='Shrink lrt results', type=NULL, test='Wald'), "The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") -}) # test_that +}) # ---------------------------------------------------------------- # # ---- make_results with dds_wald but with LRT test specified ---- # -# Similar structure to the ASCII table depicted tests from above -# but with the 'test' argument included in make_results -# and test == 'LRT' +# 3 +# | 3 | LRT | NULL | - | - | dds_wald test_that("make_results errors when user passes mismatched test == 'LRT' with Wald DDS", { - expect_error(make_results(dds_name='dds_wald', - label='Shrink lrt results', - type=NULL, - test='LRT'), + expect_error(make_results(dds_name='dds_wald', label='Shrink lrt results', type=NULL, test='LRT'), "The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") }) # test_that # ---------------------------------------------------------------- # # ----- Attempt to shrink LRT results but with test included ----- # -# Similar structure to the ASCII table depicted tests from above -# but with the 'test' argument included in make_results -# and test == 'LRT' +# | 4 | LRT | ashr | - | - | dds_lrt | +# | 5 | LRT | apeglm | - | - | dds_lrt | +# | 6 | LRT | normal | - | - | dds_lrt | for (type in c('ashr', 'apeglm', 'normal')) { test_that("make_results errors when user attempts to run lfcShrink by defining a non-NULL or missing type when test == 'LRT'", { - expect_error(make_results(dds_name='dds_lrt', - label='Shrink lrt results', - type=type, - test='LRT'), + expect_error(make_results(dds_name='dds_lrt', label='Shrink lrt results', type=type, test='LRT'), "You cannot pass a non-NULL or missing type to make_results with test == 'LRT'. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") }) # test_that } # for type @@ -192,36 +219,27 @@ for (type in c('ashr', 'apeglm', 'normal')) { # -------------- missing both test and type with LRT ------------- # -# Similar structure to the ASCII table depicted tests from above -# with the 'test' argument also missing in make_results -# and test also missing. When test is missing, test should be detected from dds_lrt as 'LRT'. -# With type also missing, type should be set as the current default: 'ashr' as of 5-30-2024. -# This combination of test and type is incompatible and so the following error message should -# be returned. +#| 7 | NULL | NULL | - | - | dds_lrt | test_that("make_results errors when user attempts to run lfcShrink by defining a missing type when test == 'LRT'", { - expect_error(make_results(dds_name='dds_lrt', - label='missing test and type of LRT DDS'), + expect_error(make_results(dds_name='dds_lrt', label='missing test and type of LRT DDS'), "You cannot pass a non-NULL or missing type to make_results with an LRT dds object. For LRT, LFC values are set to 0 and should not be passed to lfcShrink. Use type == NULL in make_results for LRT DDS objects.") -}) # test_that +}) # ---------------------------------------------------------------- # # ------------- test included for all types dds Wald ------------- # -# Similar structure to the ASCII table depicted tests from above -# but with the 'test' argument included in make_results -# and test == 'Wald' +# | 8 | Wald | ashr | - | yes | dds_wald | +# | 9 | Wald | apeglm | yes | - | dds_wald | +# | 10 | Wald | normal | - | yes | dds_wald | for (type in c('ashr', 'apeglm', 'normal')) { test_that("make_results returns a DESeqResults object with !all res$res$LFC == 0 when user passes a defined type along with test == 'Wald'", { if (type != 'apeglm') { - res <- make_results(dds_name='dds_wald', - label='Shrink Wald results', - contrast=contrast, - type=type, - test='Wald') + res <- make_results(dds_name='dds_wald', label='Shrink Wald results', contrast=contrast, type=type, test='Wald') expect_true(is_deseq_res(res$res)) - expect_true(identical(names(res), c('res', 'dds', 'label'))) - expect_true(!all(res$res$log2FoldChange == 0)) - expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) + expect expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect expect_true(!all(res$res$log2FoldChange == 0)) + expect expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + # Can't use `contrast` argument for apeglm } else if (type == 'apeglm') { res <- make_results(dds_name='dds_wald', label='Shrink Wald results', @@ -239,8 +257,8 @@ for (type in c('ashr', 'apeglm', 'normal')) { # ---------------------- missing dds_list ------------------------ # -orig_dds_list <- dds_list -remove(dds_list, envir=.GlobalEnv) +# | 8 | Wald | ashr | - | yes | dds_wald | +remove(dds_list) test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { expect_error(make_results(dds_name='dds_wald', label='missing dds_list', From f5b2b8c9313f1a0247d065ada977dcb8c27f5cc7 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:02:08 -0400 Subject: [PATCH 79/93] added S4Vectors::mcols --- lib/lcdbwf/tests/testthat/test-dds.R | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index a3e0b09d..1f917d03 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -1,6 +1,8 @@ +# For development #library(DESeq2) #library(testthat) #devtools::load_all('../../../../lib/lcdbwf') + config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') @@ -16,7 +18,7 @@ test_that("strip_dotted_version_from_dds works", { rownames(dds) <- paste0("ENS", rownames(dds), '.', seq(1000)) fixed <- lcdbwf:::strip_dotted_version_from_dds(dds) expect_equal(rownames(fixed)[1], "ENSgene1") -}) # test_that +}) # --------------------------------------------------------- # # --------------------- Test make_dds() ------------------- # @@ -27,13 +29,10 @@ test_that("make_dds handles minimum required design data of sampletale, config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel) - # Check that the dds object is a DESeqDataSet (not NULL) expect_true(inherits(dds, "DESeqDataSet")) - # Check that the WaldStatistic has been generated as Wald is the default test that - # is run with minimal input to make_dds() expect_true(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) expect_false(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) -}) # test_that +}) test_that("make_dds handles design data where Wald test is specified explicitly", { design_data <- make_design_data() @@ -42,12 +41,10 @@ test_that("make_dds handles design data where Wald test is specified explicitly" config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel) - # Check that the dds object is a DESeqDataSet (not NULL) expect_true(inherits(dds, "DESeqDataSet")) - # Check that the WaldStatistic has been generated when Wald is passed to make_dds() expect_true(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) expect_false(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) -}) # test_that +}) test_that("make_dds handles required design data for LRT", { design_data <- make_design_data() @@ -57,12 +54,10 @@ test_that("make_dds handles required design data for LRT", { config=config, featureCounts='featurecounts.txt', parallel=config$parallel$parallel) - # Check that the dds object is a DESeqDataSet (not NULL) expect_true(inherits(dds, "DESeqDataSet")) - # Check that the WaldStatistic has been generated expect_true(any(grepl('LRT', colnames(S4Vectors::mcols(dds))))) expect_false(any(grepl('Wald', colnames(S4Vectors::mcols(dds))))) -}) # test_that +}) # Now we intentionally call make_results with incompatible parameters test_that("make_dds errors on invalid 'test' option", { @@ -73,7 +68,7 @@ test_that("make_dds errors on invalid 'test' option", { featureCounts='featurecounts.txt', parallel=config$parallel$parallel), paste("Valid options for test are \\'Wald\\' \\(default\\) or \\'LRT\\'. You chose,", design_data$test)) -}) # test_that +}) test_that("make_dds errors on missing reduced design when 'test' is set to LRT", { design_data <- make_design_data() @@ -83,7 +78,7 @@ test_that("make_dds errors on missing reduced design when 'test' is set to LRT", featureCounts='featurecounts.txt', parallel=config$parallel$parallel), "When using LRT, reduced_design must be provided") -}) # test_that +}) test_that("make_dds errors on missing test argument when reduced design is provided", { design_data <- make_design_data() @@ -93,7 +88,7 @@ test_that("make_dds errors on missing test argument when reduced design is provi featureCounts='featurecounts.txt', parallel=config$parallel$parallel), "You included a reduced design formula but did not specify test = 'LRT'") -}) # test_that +}) # --------------------------------------------------- # # -------------- collapseReplicates2 ---------------- # @@ -102,7 +97,6 @@ test_that("collapseReplicates2 collapses the two control replicates and two trea column 1.", { # Setup a DESeqDataSet with replicates design_data <- make_design_data() - # Add bio_rep to colData dds <- make_dds(design_data, config=config, featureCounts='featurecounts.txt', @@ -115,5 +109,6 @@ test_that("collapseReplicates2 collapses the two control replicates and two trea expect_equal(length(as.character(colData_collapsed$group[colData_collapsed$group == 'treatment'])), 1) # Check if the first column of colData matches rownames expect_equal(rownames(colData_collapsed), colData_collapsed[,1]) -}) # test_that +}) # --------------------------------------------------- # + From 431feb202316bff0bb66a28a4293079bccd98cdf Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:42:38 -0400 Subject: [PATCH 80/93] fixed object test_detected not found error --- lib/lcdbwf/R/contrasts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index f41d2067..bb8dda55 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -166,13 +166,13 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Ensure any provided `test` argument is consistent with the dds object provided. # This uses names from S4Vectors::mcols(dds) to detect how the dds object was created. + test_detected <- FALSE if ('test' %in% names(dots)) { if ((dots$test == 'Wald' && any(grepl('LRT', names(S4Vectors::mcols(dds))))) || (dots$test == 'LRT' && any(grepl('Wald', names(S4Vectors::mcols(dds)))))) { stop("The 'test' passed to make_results does not match the detected test type in dds") } } else { - test_detected <- FALSE if (any(grepl('LRT', names(S4Vectors::mcols(dds))))) { dots$test <- 'LRT' test_detected <- TRUE From e9b02e70bfc6701663e42e1e864bf10130527aae Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:43:07 -0400 Subject: [PATCH 81/93] updated error messages, minor fixes --- lib/lcdbwf/tests/testthat/test-contrasts.R | 42 ++++++++++------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 1cbb22bd..186f48f7 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -1,11 +1,3 @@ -# For developement -#library(DESeq2) -#library(testthat) -#library(stringr) -#library(BiocParallel) -#register(MulticoreParam(config$parallel$cores)) - -devtools::load_all('../../../../lib/lcdbwf') # Used for the %||% operator library(rlang) @@ -190,7 +182,7 @@ test_that("make_results can handle dds object directly", { # | 2 | Wald | NULL | - | - | dds_lrt | test_that("make_results errors when user passes mismatched test == 'Wald' with LRT DDS", { expect_error(make_results(dds_name='dds_lrt', label='Shrink lrt results', type=NULL, test='Wald'), - "The 'test' passed to make_results was set to 'Wald' but 'LRT' has been detected in dds") + "The 'test' passed to make_results does not match the detected test type in dds") }) # ---------------------------------------------------------------- # @@ -200,7 +192,7 @@ test_that("make_results errors when user passes mismatched test == 'Wald' with L # | 3 | LRT | NULL | - | - | dds_wald test_that("make_results errors when user passes mismatched test == 'LRT' with Wald DDS", { expect_error(make_results(dds_name='dds_wald', label='Shrink lrt results', type=NULL, test='LRT'), - "The 'test' passed to make_results was set to 'LRT' but 'Wald' has been detected in dds") + "The 'test' passed to make_results does not match the detected test type in dds") }) # test_that # ---------------------------------------------------------------- # @@ -236,9 +228,9 @@ for (type in c('ashr', 'apeglm', 'normal')) { if (type != 'apeglm') { res <- make_results(dds_name='dds_wald', label='Shrink Wald results', contrast=contrast, type=type, test='Wald') expect_true(is_deseq_res(res$res)) - expect expect_true(identical(names(res), c('res', 'dds', 'label'))) - expect expect_true(!all(res$res$log2FoldChange == 0)) - expect expect_true(any(grepl('Wald', mcols(res$res)$description[4]))) + expect_true(identical(names(res), c('res', 'dds', 'label'))) + expect_true(!all(res$res$log2FoldChange == 0)) + expect_true(any(grepl('Wald', S4Vectors::mcols(res$res)$description[4]))) # Can't use `contrast` argument for apeglm } else if (type == 'apeglm') { res <- make_results(dds_name='dds_wald', @@ -255,17 +247,19 @@ for (type in c('ashr', 'apeglm', 'normal')) { } # for type # ---------------------------------------------------------------- # - +# Commented out this test until we can figure out how to get testthat to remove an object +# from the .GlobalEnv # ---------------------- missing dds_list ------------------------ # # | 8 | Wald | ashr | - | yes | dds_wald | -remove(dds_list) -test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { - expect_error(make_results(dds_name='dds_wald', - label='missing dds_list', - type='ashr', - contrast=contrast), - "Can't find dds_list in global environment.") -}) # test_that -# Put it back into the global env -assign("dds_list", orig_dds_list, envir=.GlobalEnv) +#orig_dds_list <- dds_list +#remove(dds_list) +#test_that("make_results errors when a dds_name is passed and dds_list is missing from .GlobalEnv", { +# expect_error(make_results(dds_name='dds_wald', +# label='missing dds_list', +# type='ashr', +# contrast=contrast), +# "Can't find dds_list in global environment.") +#}) # test_that +## Put it back into the global env +#assign("dds_list", orig_dds_list, envir=.GlobalEnv) # ---------------------------------------------------------------- # From 94345019f5c4ec1e91be6987a2613d8b43e43f9a Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:44:07 -0400 Subject: [PATCH 82/93] removed library loadings, now use Rscript -e devtools::test to run tests --- lib/lcdbwf/tests/testthat/test-dds.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index 1f917d03..c49f2f68 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -1,8 +1,3 @@ -# For development -#library(DESeq2) -#library(testthat) -#devtools::load_all('../../../../lib/lcdbwf') - config <- lcdbwf:::load_config('../../../../workflows/rnaseq/downstream/config.yaml') source('test-functions.R') From 12a7478de80d2f4e0d6c3b434603bb68210085b3 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 17:44:53 -0400 Subject: [PATCH 83/93] removed capture_output wrapper, wasnt working with Rscript -e devtools --- lib/lcdbwf/tests/testthat/test-results.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lcdbwf/tests/testthat/test-results.R b/lib/lcdbwf/tests/testthat/test-results.R index d6af3352..455f9649 100644 --- a/lib/lcdbwf/tests/testthat/test-results.R +++ b/lib/lcdbwf/tests/testthat/test-results.R @@ -53,7 +53,7 @@ lrt_res_list <- lcdbwf:::attach_extra(lrt_res_list, config) test_that("build_results_tabs works with Wald test", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- wald_dds_list - plots <- capture_output(build_results_tabs(wald_res_list, wald_dds_list, config, text)) + plots <- build_results_tabs(wald_res_list, wald_dds_list, config, text) # Check that each plot in the list is a ggplot object for (name in names(plots)) { @@ -73,7 +73,7 @@ test_that("build_results_tabs works with Wald test", { test_that("build_results_tabs works with LRT", { # build_results_tabs requires 'dds_list' in .GlobalEnv dds_list <<- lrt_dds_list - plots <- capture_output(build_results_tabs(lrt_res_list, lrt_dds_list, config, text)) + plots <- build_results_tabs(lrt_res_list, lrt_dds_list, config, text) # Check that each plot in the list is a ggplot object for (name in names(plots)) { From f912b1a58085eaa3253bebd150cef4f8dfaca2f6 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 18:02:59 -0400 Subject: [PATCH 84/93] removed unnecessary code --- lib/lcdbwf/R/results.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index 346695d8..2efa233c 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -92,7 +92,5 @@ build_results_tabs <- function(res_list, dds_list, config, text){ #' @return Boolean TRUE if results object's pvalues were provided by #' nBinomLRT and FALSE if the Wald test was used. check_LRT <- function(res_i) { - mcols_pval <- mcols(res_i)$description[9] - mcols_pval <- grepl('LRT', mcols_pval) - return(mcols_pval) + mcols <- grepl('LRT', mcols(res_i)$description[9]) } From 2600c4c66920a239cccf910be808ce0c8d2efb21 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Tue, 11 Jun 2024 18:09:29 -0400 Subject: [PATCH 85/93] small changes missed when manually resolving conflicts --- .gitignore | 4 ++-- env-r.yml | 1 - lib/lcdbwf/R/contrasts.R | 8 +++++--- lib/lcdbwf/R/results.R | 2 +- lib/lcdbwf/tests/testthat/test-contrasts.R | 10 ++++++---- workflows/rnaseq/downstream/text.yaml | 6 +++--- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 11d7394c..ab3fd51e 100644 --- a/.gitignore +++ b/.gitignore @@ -63,6 +63,6 @@ workflows/rnaseq/downstream/rnaseq_cache workflows/rnaseq/downstream/rnaseq_files workflows/rnaseq/downstream/rnaseq.html *.xlsx -*._* -*Rplots.pdf +._* +Rplots.pdf /lib/include/* diff --git a/env-r.yml b/env-r.yml index 51b9bd9e..4f0a132b 100644 --- a/env-r.yml +++ b/env-r.yml @@ -402,4 +402,3 @@ dependencies: - yq=3.1.1 - zlib=1.2.13 - zstd=1.5.2 - - r-future diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index bb8dda55..ae3e3cbc 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -165,7 +165,7 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ dots[['object']] <- dds # Ensure any provided `test` argument is consistent with the dds object provided. - # This uses names from S4Vectors::mcols(dds) to detect how the dds object was created. + # This uses names from mcols(dds) to detect how the dds object was created. test_detected <- FALSE if ('test' %in% names(dots)) { if ((dots$test == 'Wald' && any(grepl('LRT', names(S4Vectors::mcols(dds))))) || @@ -184,7 +184,9 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ } } - # Set the current default for 'type' from DESeq2 for lfcShrink if 'type' was not provided + # Set the current default for 'type' from DESeq2 for lfcShrink if 'type' was not provided. + # This inspects the function definition of lfcShrink to see what the current default is + # (we have have seen it change before, hence the check). if (!'type' %in% names(dots)) { dots$type <- eval(formals(DESeq2::lfcShrink)$type)[1] } @@ -195,7 +197,7 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # When make_results is called with 'test' set to 'LRT', # or when make_results is called with 'test' missing but - # DDS object contains the LRT, we impute all rows in the log2FoldChange + # DDS object contains the LRT, we convert all values in the log2FoldChange # column of the DESeqResults object to 0. LFC values only make sense to report for a single # comparison of two sample groups. This applies to the Wald test only. # LRT is instead performing a test of the removal of one or more factor(s) from the design formula. diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index 2efa233c..066104fd 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -92,5 +92,5 @@ build_results_tabs <- function(res_list, dds_list, config, text){ #' @return Boolean TRUE if results object's pvalues were provided by #' nBinomLRT and FALSE if the Wald test was used. check_LRT <- function(res_i) { - mcols <- grepl('LRT', mcols(res_i)$description[9]) + mcols_desc <- grepl('LRT', S4Vectors::mcols(res_i)$description[9]) } diff --git a/lib/lcdbwf/tests/testthat/test-contrasts.R b/lib/lcdbwf/tests/testthat/test-contrasts.R index 186f48f7..48530aeb 100644 --- a/lib/lcdbwf/tests/testthat/test-contrasts.R +++ b/lib/lcdbwf/tests/testthat/test-contrasts.R @@ -19,15 +19,17 @@ coef <- "group_treatment_vs_control" # Creates a `dds_list` in the global namespace with names `dds_wald` and `dds_lrt` to be used below. dds_list <- make_dds_list(config) - # Ensure dds_list makes it into the global environment, no matter what fancy # stuff {testthat} is doing. assign("dds_list", dds_list, envir=.GlobalEnv) + lrt_design_data <- make_lrt_design_data() # Each row in the ASCII table indicates which combination of test, type, coef, and contrast # is tested by the respective indexed conditional statement in the following test_that code. - +# "Results" keeps track of where the results objects are made; "Check" keeps track of +# where those results are checked. Use this table as a guide to the tests below. +# #+---------+-------+--------+------+----------+-------+----------+ #| Results | Test | Type | Coef | Contrast | Check | DDS | #+---------+-------+--------+------+----------+-------+----------+ @@ -193,7 +195,7 @@ test_that("make_results errors when user passes mismatched test == 'Wald' with L test_that("make_results errors when user passes mismatched test == 'LRT' with Wald DDS", { expect_error(make_results(dds_name='dds_wald', label='Shrink lrt results', type=NULL, test='LRT'), "The 'test' passed to make_results does not match the detected test type in dds") -}) # test_that +}) # ---------------------------------------------------------------- # @@ -259,7 +261,7 @@ for (type in c('ashr', 'apeglm', 'normal')) { # type='ashr', # contrast=contrast), # "Can't find dds_list in global environment.") -#}) # test_that +#}) ## Put it back into the global env #assign("dds_list", orig_dds_list, envir=.GlobalEnv) # ---------------------------------------------------------------- # diff --git a/workflows/rnaseq/downstream/text.yaml b/workflows/rnaseq/downstream/text.yaml index 1a413122..c35df71f 100644 --- a/workflows/rnaseq/downstream/text.yaml +++ b/workflows/rnaseq/downstream/text.yaml @@ -118,7 +118,7 @@ results_plots: normalized to library size, not transcript length. We can say that a gene with higher baseMean than another gene has more observed reads, but we cannot say whether that is because it has a longer transcript or because - there are more transcripts. For likelihood ratio test results, we impute + there are more transcripts. For likelihood ratio test results, we set the log2FoldChange values to 0. See "log2FoldChange" below and [DESeq2 likelihood ratio test] (https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) for details as to why. @@ -126,7 +126,7 @@ results_plots: volcano: | A **volcano plot** shows fewer dimensions than an M-A plot (it does not show normalized counts) but it can be useful for looking at the relationship of - adjusted p-value and log2FoldChange. For likelihood ratio test results, we impute + adjusted p-value and log2FoldChange. For likelihood ratio test results, we set the log2FoldChange values to 0. See "log2FoldChange" below and [DESeq2 likelihood ratio test] (https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) for details as to why. @@ -235,7 +235,7 @@ results_files: LRT is a method for comparing the goodness of fit between a full model and a reduced model, evaluating whether the reduced model adequately explains the data without relying on a single pairwise comparison. The log2FoldChange is - only applicable for pairwise comparisons; therefore, we impute the + only applicable for pairwise comparisons; therefore, we set the log2FoldChange values to 0. See [DESeq2 likelihood ratio test](https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison.) for more details. From e8fdb30f5d49532c811f358af5827c89026ae206 Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Thu, 13 Jun 2024 09:24:10 -0400 Subject: [PATCH 86/93] Apply suggestions from code review --- lib/lcdbwf/R/contrasts.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index ae3e3cbc..c60cbe51 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -126,7 +126,7 @@ dds_coefs <- function(dds, ..., expand=FALSE){ #' @param dds_list List of dds objects. If NULL, then look in the global #' environment for an object called "dds_list" and use that. #' @param type Type of shrinkage for use by lfcShrink(). If no type is given, -#' we use the current DESeq2 default argument for Type. If +#' we use the current DESeq2 default `type` argument for lfcShrink. If #' NULL is given, we skip lfcShrink() altogether and directly return the object from results(). #' @param ... Additional arguments are passed to results() and lfcShrink(). If #' "parallel" is not explicitly specified here, then look in the global env for @@ -231,9 +231,9 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ return( list( - res = res, - dds = dds_name, - label = label + res=res, + dds=dds_name, + label=label ) ) } From 89621733039b3e72a473ab9f221c851afa0e11bb Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 13 Jun 2024 11:21:28 -0400 Subject: [PATCH 87/93] rnaseq.Rmd LRT make_dds and make_results examples --- workflows/rnaseq/downstream/rnaseq.Rmd | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd index 1dcf7d91..769626fd 100644 --- a/workflows/rnaseq/downstream/rnaseq.Rmd +++ b/workflows/rnaseq/downstream/rnaseq.Rmd @@ -185,6 +185,13 @@ lst <- list( sampletable=colData, design=~group, kallisto=TRUE) + + # Example 5: use LRT + LRT=list( + sampletable=colData, + design=~group, + reduced_design=~1, + test='LRT') # ------------------------------------------------------------------ ) dds_list <- map(lst, lcdbwf:::make_dds, config=config, parallel=config$parallel$parallel) @@ -211,7 +218,7 @@ there are more tabs for the various output. # - create each list in a different chunk # - ensure chunk name starts with "results_" # - ensure list name starts with "contr_[index]_". The rest of the name will be used -# as a readable label for each constrast. [index] is an alphanumeric string +# as a readable label for each constrast. [index] is an alphanumeric string # (ex: contr_01_* or contr_2b_*) that will be used to sort contrasts for output files. # The index string must contain at least 1 character and cannot contain "_" # @@ -269,6 +276,24 @@ contr_03_kallisto <- lcdbwf:::make_results( ) ``` +```{r results_05, dependson='dds_list', cache=TRUE} +# Example 5: +# - Examples 1-4 use the default DESeq2 test, Wald. +# - Here, we use the nBinomLRT (LRT) test. +# NOTE: Use 'type=NULL' to skip LFC shrinkage as +# make_results sets all LRT LFC values to 0. +# 'contrast' and 'coef' should be omitted +# since LRT uses the full vs. reduced design. +# The 'test' argument can also be omitted as +# make_results detects the test type from the +# dds object in 'dds_name'. + +contr_05_lrt <- lcdbwf:::make_results( + dds_name="LRT", + type=NULL, + label='Using LRT' +) +``` ```{r assemble_variables, cache=TRUE, config=config$annotation, dependson=knitr::all_labels()[grepl("^results_", knitr::all_labels())]} res_list <- lcdbwf:::collect_objects("^contr_[^_]+_") From 07b47d97aa7227d578cfd0816cd88196aa64a441 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 13 Jun 2024 17:25:49 -0400 Subject: [PATCH 88/93] Added LRT examples to rnaseq.Rmd --- workflows/rnaseq/downstream/rnaseq.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd index 769626fd..c36c1a3f 100644 --- a/workflows/rnaseq/downstream/rnaseq.Rmd +++ b/workflows/rnaseq/downstream/rnaseq.Rmd @@ -285,8 +285,8 @@ contr_03_kallisto <- lcdbwf:::make_results( # 'contrast' and 'coef' should be omitted # since LRT uses the full vs. reduced design. # The 'test' argument can also be omitted as -# make_results detects the test type from the -# dds object in 'dds_name'. +# make_results detects the 'test' type from the +# dds object specified with 'dds_name'. contr_05_lrt <- lcdbwf:::make_results( dds_name="LRT", From 69b04c5793be8de97d05dc91f35c0f8940254ebe Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 13 Jun 2024 17:26:27 -0400 Subject: [PATCH 89/93] contrasts.R - added warning for all LFC set to 0 --- lib/lcdbwf/R/contrasts.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/lcdbwf/R/contrasts.R b/lib/lcdbwf/R/contrasts.R index c60cbe51..4d07a424 100644 --- a/lib/lcdbwf/R/contrasts.R +++ b/lib/lcdbwf/R/contrasts.R @@ -196,10 +196,10 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ res <- do.call(DESeq2::results, results_dots) # When make_results is called with 'test' set to 'LRT', - # or when make_results is called with 'test' missing but + # or when make_results is called with 'test' missing but the # DDS object contains the LRT, we convert all values in the log2FoldChange # column of the DESeqResults object to 0. LFC values only make sense to report for a single - # comparison of two sample groups. This applies to the Wald test only. + # comparison of two sample groups. This only applies to the Wald test. # LRT is instead performing a test of the removal of one or more factor(s) from the design formula. # DESeq2 reports log2FoldChange values for a single pair-wise comparison when test == 'LRT'. This # can be misleading and so this is our solution. @@ -207,6 +207,7 @@ make_results <- function(dds_name, label, dds_list=NULL, ...){ # Adjust log2FoldChange for LRT test if (!is.null(dots$test) && dots$test == 'LRT') { res$log2FoldChange <- 0 + warning("All log2FoldChange values in the DESeq2 results object have been set to 0. See https://github.com/lcdb/lcdb-wf/blob/master/docs/rnaseq-rmd.rst?plain=1#L269.") } # Checks for LRT test and non-NULL type From 2fd50ae6e75756e43daf2c4c835a417623715f20 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Thu, 13 Jun 2024 17:27:15 -0400 Subject: [PATCH 90/93] rnaseq-rmd.rst - added LRT description to results docs --- docs/rnaseq-rmd.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/rnaseq-rmd.rst b/docs/rnaseq-rmd.rst index 430ea383..01ec225a 100644 --- a/docs/rnaseq-rmd.rst +++ b/docs/rnaseq-rmd.rst @@ -208,7 +208,7 @@ Here is how the code above would look using this method: subset.counts=TRUE)) ) - dds.list <- map(lst, lcdbwf::make_dds, config=config, parallel=config$parallel$parallel) + dds_list <- map(lst, lcdbwf::make_dds, config=config, parallel=config$parallel$parallel) That is, first we create a list of lists (``lst``), and then we used ``map()`` to apply the ``make_dds`` function to all items in the list. The collapsing of @@ -260,6 +260,23 @@ adds some extra convenience when working with lists of dds objects, including the detection of parallelization as set up in the config object. See the help for ``lcdbwf::make_results()`` for more details. +By default, if no test argument is specified in the parameters for +``lcdbwf::make_dds`` (examples 1-4, rnaseq.Rmd, lines 164-187), the Wald test is +performed. When ``lcdbwf::make_results`` processes a Wald test dds object, it +detects the Wald test and expects a ``contrast`` or ``coef`` argument to specify which +p-values and log2FoldChange values to report. + +DESeq2 also supports the nBinomLRT (LRT). Example 5 (rnaseq.Rmd, line 189) +demonstrates how to create a dds object with LRT data. Since the LRT tests +the removal of one or more terms from the design formula, a single +log2FoldChange column doesn't reflect the test's complexity. DESeq2's results +object is optimized for the Wald test, and when storing LRT results, it +maintains consistency in datastructure by choosing a single pair-wise comparison for +log2FoldChange values. To avoid confusion, we set all log2FoldChange values to +0 for LRT results. + +For more details, see the DESeq2 documentation: `DESeq2 Likelihood Ratio Test `_. + .. _rules: To take advantage of this infrastructure, we put each of those contrasts into From 3623f04be3d3225d54704efc70adc7cde31ff35c Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Mon, 17 Jun 2024 09:23:05 -0400 Subject: [PATCH 91/93] missing comma in rnaseq.Rmd --- workflows/rnaseq/downstream/rnaseq.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd index c36c1a3f..016f5881 100644 --- a/workflows/rnaseq/downstream/rnaseq.Rmd +++ b/workflows/rnaseq/downstream/rnaseq.Rmd @@ -184,7 +184,7 @@ lst <- list( kallisto=list( sampletable=colData, design=~group, - kallisto=TRUE) + kallisto=TRUE), # Example 5: use LRT LRT=list( From 3f9fe76659ee16fecbd4776dd0b6350f48d11d49 Mon Sep 17 00:00:00 2001 From: menoldmt <129203894+menoldmt@users.noreply.github.com> Date: Mon, 1 Jul 2024 11:39:09 -0400 Subject: [PATCH 92/93] Update docs/rnaseq-rmd.rst emphasize LFC for LRT is set to 0 Co-authored-by: Ryan Dale --- docs/rnaseq-rmd.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/rnaseq-rmd.rst b/docs/rnaseq-rmd.rst index 01ec225a..1d903fae 100644 --- a/docs/rnaseq-rmd.rst +++ b/docs/rnaseq-rmd.rst @@ -272,8 +272,8 @@ the removal of one or more terms from the design formula, a single log2FoldChange column doesn't reflect the test's complexity. DESeq2's results object is optimized for the Wald test, and when storing LRT results, it maintains consistency in datastructure by choosing a single pair-wise comparison for -log2FoldChange values. To avoid confusion, we set all log2FoldChange values to -0 for LRT results. +log2FoldChange values. To avoid confusion, ***we set all log2FoldChange values to +0 for LRT results***. For more details, see the DESeq2 documentation: `DESeq2 Likelihood Ratio Test `_. From c19c2712038068200c79b5ece5636f79693d4ae0 Mon Sep 17 00:00:00 2001 From: Matthew Menold Date: Mon, 1 Jul 2024 13:30:49 -0400 Subject: [PATCH 93/93] static line number references replaced with permalinks to relevant lines on github --- docs/rnaseq-rmd.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/rnaseq-rmd.rst b/docs/rnaseq-rmd.rst index 1d903fae..e28411ed 100644 --- a/docs/rnaseq-rmd.rst +++ b/docs/rnaseq-rmd.rst @@ -261,12 +261,12 @@ the detection of parallelization as set up in the config object. See the help for ``lcdbwf::make_results()`` for more details. By default, if no test argument is specified in the parameters for -``lcdbwf::make_dds`` (examples 1-4, rnaseq.Rmd, lines 164-187), the Wald test is -performed. When ``lcdbwf::make_results`` processes a Wald test dds object, it +``lcdbwf::make_dds`` found here in `examples 1-4, `_ +the Wald test is performed. When ``lcdbwf::make_results`` processes a Wald test dds object, it detects the Wald test and expects a ``contrast`` or ``coef`` argument to specify which p-values and log2FoldChange values to report. -DESeq2 also supports the nBinomLRT (LRT). Example 5 (rnaseq.Rmd, line 189) +DESeq2 also supports the nBinomLRT (LRT). `Example 5 `_ demonstrates how to create a dds object with LRT data. Since the LRT tests the removal of one or more terms from the design formula, a single log2FoldChange column doesn't reflect the test's complexity. DESeq2's results @@ -275,7 +275,8 @@ maintains consistency in datastructure by choosing a single pair-wise comparison log2FoldChange values. To avoid confusion, ***we set all log2FoldChange values to 0 for LRT results***. -For more details, see the DESeq2 documentation: `DESeq2 Likelihood Ratio Test `_. +For more details, see the DESeq2 documentation: +`DESeq2 Likelihood Ratio Test `_. .. _rules: