nf-core · KamilMaliszArdigen · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 14, 2024
diff --git a/modules/nf-core/limma/differential/environment.yml b/modules/nf-core/limma/differential/environment.yml
@@ -2,4 +2,8 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bioconda::bioconductor-limma=3.54.0
+  - bioconda::bioconductor-edger=4.0.16
+  - bioconda::bioconductor-ihw=1.28.0
+  - bioconda::bioconductor-limma=3.58.1
+  - conda-forge::r-dplyr=1.1.4
+  - conda-forge::r-readr=2.1.5
diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf
@@ -17,6 +17,7 @@ process LIMMA_DIFFERENTIAL {
     tuple val(meta), path("*.MArrayLM.limma.rds")         , emit: rdata
     tuple val(meta), path("*.limma.model.txt")            , emit: model
     tuple val(meta), path("*.R_sessionInfo.log")          , emit: session_info
+    tuple val(meta), path("*.normalised_counts.tsv")      , emit: normalised_counts, optional: true
     path "versions.yml"                                   , emit: versions
 
     when:

diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R
@@ -77,6 +77,9 @@ opt <- list(
     subset_to_contrast_samples = FALSE,
     exclude_samples_col = NULL,
     exclude_samples_values = NULL,
+    use_voom = TRUE,
+    IHW_correction = TRUE,
+    number = Inf,
     ndups = NULL,                # lmFit
     spacing = NULL,              # lmFit
     block = NULL,                # lmFit
@@ -142,6 +145,11 @@ opt[vector_opt] = lapply(strsplit(unlist(opt[vector_opt]), ','), as.numeric)
 ################################################
 
 library(limma)
+library(edgeR)
+library(readr)
+library(dplyr)
+library(tibble)
+library(IHW)
 
 ################################################
 ################################################
@@ -287,11 +295,38 @@ colnames(design) <- sub(
     paste0(contrast_variable, '.'), colnames(design)
 )
 
+# Perform voom normalisation for RNA-seq data
+if (!is.null(opt\$use_voom) && opt\$use_voom) {
+    # Create a DGEList object for RNA-seq data
+    dge <- DGEList(counts = intensities.table)
+
+    # Normalize counts using TMM
+    dge <- calcNormFactors(dge, method = "TMM")
+
+    # Run voom to transform the data
+    voom_result <- voom(dge, design)
+    data_for_fit <- voom_result
+
+    # Write the normalized counts matrix to a TSV file
+    normalized_counts <- voom_result\$E
+    normalized_counts_with_genes <- data.frame(Gene = rownames(normalized_counts), normalized_counts, row.names = NULL)
+    colnames(normalized_counts_with_genes)[1] <- opt\$probe_id_col
+    write.table(normalized_counts_with_genes,
+        file = paste(opt\$output_prefix, "normalised_counts.tsv", sep = '.'),
+        sep = "\t",
+        quote = FALSE,
+        row.names = FALSE)
+
+} else {
+    # Use as.matrix for regular microarray analysis
+    data_for_fit <- as.matrix(intensities.table)
+}
+
 # Prepare for and run lmFit()
 
 lmfit_args = c(
     list(
-        object = as.matrix(intensities.table),
+        object = data_for_fit,
         design = design
     ),
     opt[c('ndups', 'spacing', 'block', 'method')]
@@ -322,18 +357,54 @@ ebayes_args = c(
 
 fit2 <- do.call(eBayes, ebayes_args)
 
-# Run topTable() to generate a results data frame
+if ((!is.null(opt\$use_voom) && opt\$use_voom) && (!is.null(opt\$IHW_correction) && opt\$IHW_correction)) {
 
-toptable_args = c(
-    list(
-        fit = fit2,
-        sort.by = 'none',
-        number = nrow(intensities.table)
-    ),
-    opt[c('adjust.method', 'p.value', 'lfc', 'confint')]
-)
+    # Define the column name for the group comparison
+    name_con_ref <- glue::glue("{contrast_variable}.{opt\$target_level}-{contrast_variable}.{opt\$reference_level}")
+
+    # Calculate the median gene expression
+    median_expression <- apply(dge\$counts, 1, median)
+    # Apply IHW for multiple testing correction using median expression as covariate
+    ihw_result <- ihw(fit2\$p.value[, name_con_ref] ~ median_expression, alpha = 0.05)
+    ihw_result@df <- as_tibble(ihw_result@df, rownames = opt\$probe_id_col)
+
+    # Convert fit\$p.value to a tibble, including rownames as a new column 'Gene'
+    fit_df <- as_tibble(fit2\$p.value, rownames = opt\$probe_id_col)
+
+    fit_df <- fit_df %>%
+        left_join(ihw_result@df %>% select(!!sym(opt\$probe_id_col),adj_pvalue),by=opt\$probe_id_col) %>%
+        rename(ihw.adj.p.value = adj_pvalue)
 
-comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),]
+    # Convert back to a matrix if necessary (not needed for merging, but for consistency later)
+    fit\$p.value <- as.matrix(fit_df %>% select(-!!sym(opt\$probe_id_col))) # Keep the original structure without probe_id_col column
+
+    # Generate the topTable including the new adjusted p-values
+    toptable_args <- list(fit = fit2, coef = name_con_ref, number = opt\$number, adjust.method = opt\$adjust.method,
+        p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint)
+
+    comp.results <- do.call(topTable, toptable_args)
+    # Add the IHW-adjusted p-values to the topTable results by merging based on probe_id_col
+    comp.results <- comp.results %>%
+        rownames_to_column(opt\$probe_id_col) %>%    # Ensure probe_id_col column exists in comp.results
+        left_join(fit_df, by = opt\$probe_id_col) %>%    # Merge based on probe_id_col column
+        column_to_rownames(var=opt\$probe_id_col) %>%
+        select(-sym(name_con_ref))
+
+} else {
+
+    # Run topTable() to generate a results data frame
+
+    toptable_args = c(
+        list(
+            fit = fit2,
+            sort.by = 'none',
+            number = nrow(intensities.table)
+        ),
+        opt[c('adjust.method', 'p.value', 'lfc', 'confint')]
+    )
+
+    comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),]
+}
 
 ################################################
 ################################################

diff --git a/modules/nf-core/limma/differentialvoom/environment.yml b/modules/nf-core/limma/differentialvoom/environment.yml
@@ -0,0 +1,9 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::bioconductor-edger=4.0.16
+  - bioconda::bioconductor-ihw=1.28.0
+  - bioconda::bioconductor-limma=3.58.1
+  - conda-forge::r-dplyr=1.1.4
+  - conda-forge::r-readr=2.1.5
diff --git a/modules/nf-core/limma/differentialvoom/main.nf b/modules/nf-core/limma/differentialvoom/main.nf
@@ -0,0 +1,61 @@
+process LIMMA_DIFFERENTIALVOOM {
+    tag "$meta"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/bioconductor-edger_bioconductor-ihw_bioconductor-limma_r-dplyr_r-readr:7fc48564d286c1c6' :
+        'community.wave.seqera.io/library/bioconductor-edger_bioconductor-ihw_bioconductor-limma_r-dplyr_r-readr:edea0f9fbaeba3a0' }"
+
+    input:
+    tuple val(meta), val(contrast_variable), val(reference), val(target)
+    tuple val(meta2), path(samplesheet), path(intensities)
+
+    output:
+    tuple val(meta), path("*.limma.results.tsv")          , emit: results
+    tuple val(meta), path("*.limma.mean_difference.png")  , emit: md_plot
+    tuple val(meta), path("*.MArrayLM.limma.rds")         , emit: rdata
+    tuple val(meta), path("*.normalised_counts.tsv")      , emit: normalised_counts, optional: true
+    tuple val(meta), path("*.limma.model.txt")            , emit: model
+    tuple val(meta), path("*.R_sessionInfo.log")          , emit: session_info
+    path "versions.yml"                                   , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'limma_de.R'
+
+    stub:
+    prefix              = task.ext.prefix   ?: "${meta.id}"
+    """
+    #!/usr/bin/env Rscript
+    library(limma)
+
+    a <- file("${prefix}.limma.results.tsv", "w")
+    close(a)
+    a <- file("${prefix}.limma.mean_difference.png", "w")
+    close(a)
+    a <- file("${prefix}.MArrayLM.limma.rds", "w")
+    close(a)
+    a <- file("${prefix}.normalised_counts.tsv", "w")
+    close(a)
+    a <- file("${prefix}.limma.model.txt", "w")
+    close(a)
+    a <- file("${prefix}.R_sessionInfo.log", "w")
+    close(a)
+
+    ## VERSIONS FILE
+    r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
+    limma.version <- as.character(packageVersion('limma'))
+
+    writeLines(
+        c(
+            '"task.process":',
+            paste('    r-base:', r.version),
+            paste('    bioconductor-limma:', limma.version)
+        ),
+        'versions.yml'
+    )
+    """
+}
diff --git a/modules/nf-core/limma/differentialvoom/meta.yml b/modules/nf-core/limma/differentialvoom/meta.yml
@@ -0,0 +1,86 @@
+name: "limma_differential"
+description: runs a differential expression analysis with Limma
+keywords:
+  - differential
+  - expression
+  - microarray
+  - limma
+tools:
+  - "limma":
+      description: "Linear Models for Microarray Data"
+      homepage: "https://bioconductor.org/packages/release/bioc/html/limma.html"
+      documentation: "https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf"
+      tool_dev_url: https://github.com/cran/limma""
+      doi: "10.18129/B9.bioc.limma"
+      licence: ["LGPL >=3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing contrast information. This can be used at the
+        workflow level to pass optional parameters to the module, e.g.
+        [ id:'contrast1', blocking:'patient' ] passed in as ext.args like:
+        '--blocking_variable $meta.blocking'.
+  - contrast_variable:
+      type: string
+      description: |
+        The column in the sample sheet that should be used to define groups for
+        comparison
+  - reference:
+      type: string
+      description: |
+        The value within the contrast_variable column of the sample sheet that
+        should be used to derive the reference samples
+  - target:
+      type: string
+      description: |
+        The value within the contrast_variable column of the sample sheet that
+        should be used to derive the target samples
+  - meta2:
+      type: map
+      description: |
+        Groovy map containing study-wide metadata related to the sample sheet
+        and matrix
+  - samplesheeet:
+      type: file
+      description: |
+        CSV or TSV format sample sheet with sample metadata
+  - intensities:
+      type: file
+      description: |
+        Raw TSV or CSV format expression matrix with probes by row and samples
+        by column
+  - type:
+      type: string
+      description: |
+        Analysis type to be performed determines template which should be used for analysis
+
+output:
+  - results:
+      type: file
+      description: TSV-format table of differential expression information as output by Limma
+      pattern: "*.limma.results.tsv"
+  - md_plot:
+      type: file
+      description: Limma mean difference plot
+      pattern: "*.mean_difference.png"
+  - rdata:
+      type: file
+      description: Serialised MArrayLM object
+      pattern: "*.MArrayLM.limma.rds"
+  - model:
+      type: file
+      description: TXT-format limma model
+      pattern: "*.limma.model.tsv"
+  - session_info:
+      type: file
+      description: dump of R SessionInfo
+      pattern: "*.log"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@KamilMaliszArdigen"
+maintainers:
+  - "@KamilMaliszArdigen"