From 46f89021b5102516cfd421309b987d233a8d382e Mon Sep 17 00:00:00 2001 From: WackerO Date: Fri, 2 Aug 2024 13:43:44 +0200 Subject: [PATCH 01/27] fixed contrast pairs, updated changelog --- CHANGELOG.md | 10 ++++++++++ assets/RNAseq_report.Rmd | 14 ++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2a9b52..b228ff96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev + +### Added + +- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error for incorrect contrast_pairs + +### Fixed + +- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Fixed incorrect reading and indexing of contrast_pairs + ## 2.4 - A Pair of Shoes ### Added diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 6b78749a..45d1b380 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -1526,19 +1526,21 @@ if (isProvided(params$path_contrast_list)) { contrast_names <- append(contrast_names, contname) } } - if (isProvided(params$path_contrast_pairs)) { - contrasts <- read.table(path_contrast_pairs, sep="\t", header = T, colClasses = "character") + contrasts <- read.table(params$path_contrast_pairs, sep="\t", header = T, colClasses = "character") write.table(contrasts, file="differential_gene_expression/metadata/contrast_pairs.tsv", sep="\t", quote=F, col.names = T, row.names = F) # Contrast calculation for contrast pairs for (i in c(1:nrow(contrasts))) { cont <- as.character(contrasts[i,]) - contname <- cont[0] - if (!(cont[2] %in% coefficients & cont[3] %in% coefficients)){ - stop(paste("Provided contrast name is invalid, it needs to be contained in", coefficients)) + contname <- cont[1] + if (!(cont[2] %in% coefficients)){ + stop(paste0("Provided contrast name ", cont[2], " is invalid, it needs to be contained in ", paste(coefficients, collapse=", "))) + } + if (!(cont[3] %in% coefficients)){ + stop(paste0("Provided contrast name ", cont[3], " is invalid, it needs to be contained in ", paste(coefficients, collapse=", "))) } - results_DEseq_contrast <- results(cds, contrast=list(cont[1],cont[2])) + results_DEseq_contrast <- results(cds, contrast=list(cont[2],cont[3])) results_DEseq_contrast <- as.data.frame(results_DEseq_contrast) print("Analyzing contrast:") print(contname) From 87304f072ed6f9c6e49d73d8c4f9a934d9bd60f5 Mon Sep 17 00:00:00 2001 From: WackerO <43847497+WackerO@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:26:04 +0200 Subject: [PATCH 02/27] Update CHANGELOG.md Co-authored-by: Sabrina Krakau --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b228ff96..d981cfcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error for incorrect contrast_pairs +- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error message for incorrect contrast_pairs ### Fixed From 02fc8ffb53268a6bca8d7ca60b2bb01c03fe1939 Mon Sep 17 00:00:00 2001 From: WackerO Date: Thu, 24 Oct 2024 14:37:35 +0200 Subject: [PATCH 03/27] Added separate param for setting qval threshold for pathway analysis --- assets/RNAseq_report.Rmd | 19 +++++++++++-------- bin/Execute_report.R | 2 ++ modules/local/report.nf | 2 ++ nextflow.config | 1 + nextflow_schema.json | 5 +++++ 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 6b78749a..906fcd41 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -46,6 +46,7 @@ params: datasources: '' heatmaps_cluster_rows: '' heatmaps_cluster_cols: '' + pathway_pval_threshold: '' #Additional args for the report path_proj_summary: '' @@ -2056,6 +2057,8 @@ if (!isProvided(params$datasources)) { # ------------------ # Set default params # ------------------ +pathway_pval_threshold <- ifelse(params$pathway_pval_threshold == -1, params$adj_pval_threshold, params$pathway_pval_threshold) +pathway_pval_text <- as.character(pathway_pval_threshold) # Set theme for graphs theme_set(theme_classic()) @@ -2094,7 +2097,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=params$adj_pval_threshold, + user_threshold=pathway_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2110,7 +2113,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=params$adj_pval_threshold, + user_threshold=pathway_pval_threshold, domain_scope="annotated" ) pathway_gostres_nobg <- gostres_nobg$result @@ -2124,7 +2127,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=params$adj_pval_threshold, + user_threshold=pathway_pval_threshold, domain_scope="annotated" ) } @@ -2411,7 +2414,7 @@ gost_text, "\n ## Enriched pathways -The plot below summarizes the pathways that were found significantly enriched in DE genes for each contrast (padj value <= ", pval_text, "). +The plot below summarizes the pathways that were found significantly enriched in DE genes for each contrast (padj value <= ", pathway_pval_text, "). Only contrasts for which an enriched pathway was found are shown. Hover over the dots to reveal the pathway names. The table below provides more detail on all enriched pathways.")) ``` @@ -2447,7 +2450,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=params$adj_pval_threshold, + user_threshold=pathway_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2457,7 +2460,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=params$adj_pval_threshold, + user_threshold=pathway_pval_threshold, domain_scope="annotated" ) } @@ -2470,7 +2473,7 @@ if (length(q_list) > 0) { if (nrow(path_enrich) > 0){ pg2 <- gostplot(gostres, capped=T, interactive=T) - pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -params$adj_pval_threshold + pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -pathway_pval_threshold # limit gostplot y maximum dynamically for all subplots for (counter in c(1:length(contrast_files))) { @@ -2691,7 +2694,7 @@ For pathway analysis, the R packages `gprofiler2 v", version_gprofiler2, " `, `AnnotationDbi v", version_annotation, "` and `", name_species, " v", version_annotation, "` were used. ", database_string, ".\n", -"Pathways were classified as enriched for those genes with an adjusted p-value <= ", pval_text, "." +"Pathways were classified as enriched for those genes with an adjusted p-value <= ", pathway_pval_text, "." )) ``` diff --git a/bin/Execute_report.R b/bin/Execute_report.R index 21d2ad93..8bb51fe8 100755 --- a/bin/Execute_report.R +++ b/bin/Execute_report.R @@ -37,6 +37,7 @@ option_list = list( make_option("--datasources", type="character", default=NULL, help="Which datasources to use for pathway analysis.", metavar="character"), make_option("--heatmaps_cluster_rows", action="store_true", default=FALSE, help="Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."), make_option("--heatmaps_cluster_cols", action="store_true", default=FALSE, help="Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."), + make_option("--pathway_pval_threshold", type="double", default=-1, help="Which p value threshold to use for pathway analysis."), make_option(c("-s", "--proj_summary"), type="character", default=NULL, help="Project summary file", metavar="character"), make_option(c("--path_quote"), type="character", default=NULL, help="Path to the quote PDF", metavar="character"), @@ -89,6 +90,7 @@ rmarkdown::render(opt$report, output_file = opt$output, knit_root_dir = wd, outp datasources = opt$datasources, heatmaps_cluster_rows = opt$heatmaps_cluster_rows, heatmaps_cluster_cols = opt$heatmaps_cluster_cols, + pathway_pval_threshold = opt$pathway_pval_threshold, path_proj_summary = opt$proj_summary, path_quote = opt$path_quote, diff --git a/modules/local/report.nf b/modules/local/report.nf index 6164a8ff..f714d5a3 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -44,6 +44,7 @@ process REPORT { def datasources_opt = params.datasources ? "--datasources $params.datasources" : '' def heatmaps_cluster_rows_opt = params.heatmaps_cluster_rows ? "--heatmaps_cluster_rows TRUE" : '' def heatmaps_cluster_cols_opt = params.heatmaps_cluster_cols ? "--heatmaps_cluster_cols TRUE" : '' + def pathway_pval_threshold_opt = params.pathway_pval_threshold == -1 ? "--pathway_pval_threshold $params.adj_pval_threshold" : "--pathway_pval_threshold $params.pathway_pval_threshold" def quote_opt = params.quote != 'NO_FILE5' ? "--path_quote $params.quote" : '' @@ -87,6 +88,7 @@ process REPORT { $datasources_opt \ $heatmaps_cluster_rows_opt \ $heatmaps_cluster_cols_opt \ + $pathway_pval_threshold_opt \ $quote_opt \ $software_versions_opt \ --proj_summary $proj_summary \ diff --git a/nextflow.config b/nextflow.config index 175a48af..eea21636 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { datasources = null heatmaps_cluster_rows = true heatmaps_cluster_cols = false + pathway_pval_threshold = -1 // Additional args for the report project_summary = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 33f48902..672fcf5c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -156,6 +156,11 @@ "type": "boolean", "default": false, "description": "Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways." + }, + "pathway_pval_threshold": { + "type": "number", + "default": -1, + "description": "p value (float) to use as threshold for pathway analysis. If omitted, will use the value of the parameter adj_pval_threshold (default 0.05)." } } }, From bf35b0f4eec694991d4bc1aaa9408376282c690f Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Tue, 12 Nov 2024 11:03:17 +0100 Subject: [PATCH 04/27] Get raw gene count tables from either Salmon and RSEM analysis Update RNAseq_report.Rmd --- assets/RNAseq_report.Rmd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 6b78749a..3112d439 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -591,6 +591,9 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { #dds from SummarizedExperiment , then run DESeq cds <- DESeqDataSet(se, design = as.formula(eval(parse(text=as.character(design[[1]]))))) + # get raw counts + count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") + write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) # Load salmon count files } else if (params$input_type == "salmon") { @@ -624,6 +627,9 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { coldata$combfactor <- metadata$combfactor rownames(coldata) <- qbicCodes cds <- DESeqDataSetFromTximport(txi=txi.salmon, colData =coldata, design = eval(parse(text=as.character(design[[1]])))) + # get raw counts + count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") + write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) } } else { stop(paste0("Invalid input type: ", params$input_type, "! Input type must be one of: featurecounts, rsem, salmon, smrnaseq!")) From 750278b7002f242d8298103a3390a726133f6c07 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Tue, 12 Nov 2024 18:48:17 +0100 Subject: [PATCH 05/27] Update RNAseq_report.Rmd --- assets/RNAseq_report.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 3112d439..f05f4b81 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -593,6 +593,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { cds <- DESeqDataSet(se, design = as.formula(eval(parse(text=as.character(design[[1]]))))) # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") + count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) # Load salmon count files @@ -629,6 +630,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { cds <- DESeqDataSetFromTximport(txi=txi.salmon, colData =coldata, design = eval(parse(text=as.character(design[[1]])))) # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") + count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) } } else { From a601e2274bc709a7b469ae5d59e0384fbde025f4 Mon Sep 17 00:00:00 2001 From: WackerO Date: Thu, 14 Nov 2024 13:22:08 +0100 Subject: [PATCH 06/27] Fixed bug with github actions --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef2251ec..ddc61109 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -129,7 +129,7 @@ jobs: - name: Upload logs on failure if: failure() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: logs-${{ matrix.profile }} path: | From 3efd68743d0acd4a3a0e0436fb2779fd6b273ccf Mon Sep 17 00:00:00 2001 From: WackerO Date: Tue, 19 Nov 2024 15:03:22 +0100 Subject: [PATCH 07/27] update changelog --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2a9b52..67dbce22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev + +### Added + +### Changed + +### Fixed + +- [#252](https://github.com/qbic-pipelines/rnadeseq/pull/252) Fixed github CI bug by updating actions/upload-artifact + ## 2.4 - A Pair of Shoes ### Added From 5f23f5468b88006978c9383c21705a15bf0b94ab Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 20 Nov 2024 08:17:34 +0100 Subject: [PATCH 08/27] Fixed pval format in report, added test --- assets/RNAseq_report.Rmd | 2 +- conf/test.config | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 906fcd41..3f1d849c 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -2058,7 +2058,7 @@ if (!isProvided(params$datasources)) { # Set default params # ------------------ pathway_pval_threshold <- ifelse(params$pathway_pval_threshold == -1, params$adj_pval_threshold, params$pathway_pval_threshold) -pathway_pval_text <- as.character(pathway_pval_threshold) +pathway_pval_text <- format(pathway_pval_threshold, scientific=F) # Set theme for graphs theme_set(theme_classic()) diff --git a/conf/test.config b/conf/test.config index 5280f668..72b43194 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,6 +28,7 @@ params { software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv' multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' run_pathway_analysis = true + pathway_pval_threshold = 0.0003 datasources = 'KEGG,REAC' genome = 'GRCm38' quote = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/offer_example.pdf' From 4c5788210b3de5f7dafbed89488562b087055b9f Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 20 Nov 2024 09:01:47 +0100 Subject: [PATCH 09/27] updated test yml --- tests/test.yml | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tests/test.yml b/tests/test.yml index 11489eb2..78b974a3 100644 --- a/tests/test.yml +++ b/tests/test.yml @@ -68,28 +68,16 @@ - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.svg - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/pathway_enrichment_results.tsv - md5sum: 92736a4c802ebd4e644682308ecf46dc + md5sum: f0832e1cbdd4cfb4af84740d82a877a3 - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathway_enrichment_results.tsv - md5sum: 92115e662e0e2489ad05d28f3981fa3a - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/REAC_pathway_enrichment_results.tsv - md5sum: 9077c63139668fd01d3f4086e6acb7bc + md5sum: 29b3ba7ab3ebf9eacc7a021a9b67cdc1 - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.pdf - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.pdf - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.DE_contrast_condition_genotype_WT_vs_KO.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.xml - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.DE_contrast_condition_genotype_WT_vs_KO.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.xml - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/pathway_enrichment_results.tsv - md5sum: 0a425c10e229bb775597be176c414705 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/REAC_pathway_enrichment_results.tsv - md5sum: 6594fdb8c0eb591fa131982a8be00877 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.png - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.svg - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.png + md5sum: 68b329da9893e34099c7d8ad5cb9c940 - path: results_test/RNAseq_report.html From 1e8f63bfcf51bcc4d8c8844f39acf069d3ed5af7 Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 20 Nov 2024 10:19:40 +0100 Subject: [PATCH 10/27] changed test threshold --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 72b43194..1761f7d8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv' multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' run_pathway_analysis = true - pathway_pval_threshold = 0.0003 + pathway_pval_threshold = 0.0004 datasources = 'KEGG,REAC' genome = 'GRCm38' quote = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/offer_example.pdf' From 90651881e84f484e97711323536c3ad3a8908d04 Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 20 Nov 2024 10:25:48 +0100 Subject: [PATCH 11/27] updated test.yml --- tests/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test.yml b/tests/test.yml index 78b974a3..de207247 100644 --- a/tests/test.yml +++ b/tests/test.yml @@ -73,8 +73,6 @@ md5sum: 29b3ba7ab3ebf9eacc7a021a9b67cdc1 - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.pdf - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.DE_contrast_condition_genotype_WT_vs_KO.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.xml From f6e4db0461f55090abe8a9a39c4b1dfaf432180b Mon Sep 17 00:00:00 2001 From: WackerO Date: Mon, 2 Dec 2024 08:22:54 +0100 Subject: [PATCH 12/27] renamed param, updated threshold --- CHANGELOG.md | 1 + assets/RNAseq_report.Rmd | 18 +++++++++--------- bin/Execute_report.R | 4 ++-- conf/test.config | 2 +- modules/local/report.nf | 4 ++-- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2a9b52..eb67d5f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [#253](https://github.com/qbic-pipelines/rnadeseq/pull/253) Added separate param for adjusted p-value threshold for gprofiler - [#245](https://github.com/qbic-pipelines/rnadeseq/pull/245) Added background gene list to pathway analysis output ### Changed diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 3f1d849c..a9305dfe 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -46,7 +46,7 @@ params: datasources: '' heatmaps_cluster_rows: '' heatmaps_cluster_cols: '' - pathway_pval_threshold: '' + pathway_adj_pval_threshold: '' #Additional args for the report path_proj_summary: '' @@ -2057,8 +2057,8 @@ if (!isProvided(params$datasources)) { # ------------------ # Set default params # ------------------ -pathway_pval_threshold <- ifelse(params$pathway_pval_threshold == -1, params$adj_pval_threshold, params$pathway_pval_threshold) -pathway_pval_text <- format(pathway_pval_threshold, scientific=F) +pathway_adj_pval_threshold <- ifelse(params$pathway_adj_pval_threshold == -1, params$adj_pval_threshold, params$pathway_adj_pval_threshold) +pathway_pval_text <- format(pathway_adj_pval_threshold, scientific=F) # Set theme for graphs theme_set(theme_classic()) @@ -2097,7 +2097,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_pval_threshold, + user_threshold=pathway_adj_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2113,7 +2113,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_pval_threshold, + user_threshold=pathway_adj_pval_threshold, domain_scope="annotated" ) pathway_gostres_nobg <- gostres_nobg$result @@ -2127,7 +2127,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_pval_threshold, + user_threshold=pathway_adj_pval_threshold, domain_scope="annotated" ) } @@ -2450,7 +2450,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=pathway_pval_threshold, + user_threshold=pathway_adj_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2460,7 +2460,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=pathway_pval_threshold, + user_threshold=pathway_adj_pval_threshold, domain_scope="annotated" ) } @@ -2473,7 +2473,7 @@ if (length(q_list) > 0) { if (nrow(path_enrich) > 0){ pg2 <- gostplot(gostres, capped=T, interactive=T) - pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -pathway_pval_threshold + pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -pathway_adj_pval_threshold # limit gostplot y maximum dynamically for all subplots for (counter in c(1:length(contrast_files))) { diff --git a/bin/Execute_report.R b/bin/Execute_report.R index 8bb51fe8..7d313bff 100755 --- a/bin/Execute_report.R +++ b/bin/Execute_report.R @@ -37,7 +37,7 @@ option_list = list( make_option("--datasources", type="character", default=NULL, help="Which datasources to use for pathway analysis.", metavar="character"), make_option("--heatmaps_cluster_rows", action="store_true", default=FALSE, help="Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."), make_option("--heatmaps_cluster_cols", action="store_true", default=FALSE, help="Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."), - make_option("--pathway_pval_threshold", type="double", default=-1, help="Which p value threshold to use for pathway analysis."), + make_option("--pathway_adj_pval_threshold", type="double", default=-1, help="Which adjusted p value threshold to use for pathway analysis."), make_option(c("-s", "--proj_summary"), type="character", default=NULL, help="Project summary file", metavar="character"), make_option(c("--path_quote"), type="character", default=NULL, help="Path to the quote PDF", metavar="character"), @@ -90,7 +90,7 @@ rmarkdown::render(opt$report, output_file = opt$output, knit_root_dir = wd, outp datasources = opt$datasources, heatmaps_cluster_rows = opt$heatmaps_cluster_rows, heatmaps_cluster_cols = opt$heatmaps_cluster_cols, - pathway_pval_threshold = opt$pathway_pval_threshold, + pathway_adj_pval_threshold = opt$pathway_adj_pval_threshold, path_proj_summary = opt$proj_summary, path_quote = opt$path_quote, diff --git a/conf/test.config b/conf/test.config index 1761f7d8..61abb7b6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv' multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' run_pathway_analysis = true - pathway_pval_threshold = 0.0004 + pathway_adj_pval_threshold = 0.0004 datasources = 'KEGG,REAC' genome = 'GRCm38' quote = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/offer_example.pdf' diff --git a/modules/local/report.nf b/modules/local/report.nf index f714d5a3..a67227f9 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -44,7 +44,7 @@ process REPORT { def datasources_opt = params.datasources ? "--datasources $params.datasources" : '' def heatmaps_cluster_rows_opt = params.heatmaps_cluster_rows ? "--heatmaps_cluster_rows TRUE" : '' def heatmaps_cluster_cols_opt = params.heatmaps_cluster_cols ? "--heatmaps_cluster_cols TRUE" : '' - def pathway_pval_threshold_opt = params.pathway_pval_threshold == -1 ? "--pathway_pval_threshold $params.adj_pval_threshold" : "--pathway_pval_threshold $params.pathway_pval_threshold" + def pathway_adj_pval_threshold_opt = params.pathway_adj_pval_threshold == -1 ? "--pathway_adj_pval_threshold $params.adj_pval_threshold" : "--pathway_adj_pval_threshold $params.pathway_adj_pval_threshold" def quote_opt = params.quote != 'NO_FILE5' ? "--path_quote $params.quote" : '' @@ -88,7 +88,7 @@ process REPORT { $datasources_opt \ $heatmaps_cluster_rows_opt \ $heatmaps_cluster_cols_opt \ - $pathway_pval_threshold_opt \ + $pathway_adj_pval_threshold_opt \ $quote_opt \ $software_versions_opt \ --proj_summary $proj_summary \ diff --git a/nextflow.config b/nextflow.config index eea21636..b5a13834 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,7 @@ params { datasources = null heatmaps_cluster_rows = true heatmaps_cluster_cols = false - pathway_pval_threshold = -1 + pathway_adj_pval_threshold = -1 // Additional args for the report project_summary = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 672fcf5c..3ee4f42d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -157,10 +157,10 @@ "default": false, "description": "Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways." }, - "pathway_pval_threshold": { + "pathway_adj_pval_threshold": { "type": "number", "default": -1, - "description": "p value (float) to use as threshold for pathway analysis. If omitted, will use the value of the parameter adj_pval_threshold (default 0.05)." + "description": "Adjusted p value (float) to use as threshold for pathway analysis. If omitted, will use the value of the parameter adj_pval_threshold (default 0.05)." } } }, From 9e1c23e037f25a2e7f71aed0902e92e263f5a4f1 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 13:00:22 +0100 Subject: [PATCH 13/27] Update test_star_salmon.yml Added file path and checksum for raw_gene_counts.tsv file --- tests/test_star_salmon.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_star_salmon.yml b/tests/test_star_salmon.yml index 2b111164..e3bf0e5a 100644 --- a/tests/test_star_salmon.yml +++ b/tests/test_star_salmon.yml @@ -16,6 +16,8 @@ - path: results_test/differential_gene_expression/gene_counts_tables/deseq2_library_scaled_gene_counts.tsv - path: results_test/differential_gene_expression/gene_counts_tables/rlog_transformed_gene_counts.tsv - path: results_test/differential_gene_expression/gene_counts_tables/sizeFactor_libraries.tsv + - path: results_test/differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv + md5sum: 8ce16900c36615ae3d3b5e0bf0c856f0 - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/IL10_ENSG00000136634.pdf - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/IL10_ENSG00000136634.png - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/IL10_ENSG00000136634.svg From 4fc1d15e7c6526aec13089cb08336c47ca2ba9e3 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 13:03:52 +0100 Subject: [PATCH 14/27] Update test_star_rsem.yml Added file path and checksum for raw_gene_counts.tsv file --- tests/test_star_rsem.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_star_rsem.yml b/tests/test_star_rsem.yml index 110848e8..1b130f8d 100644 --- a/tests/test_star_rsem.yml +++ b/tests/test_star_rsem.yml @@ -13,6 +13,8 @@ # md5sum: ea4b16a379f09fa44c7c541ccab64ce6 - path: results_test/differential_gene_expression/final_gene_table/final_DE_gene_list.tsv md5sum: f1825ed59878115a279db7151b0ed0a1 + - path: results_test/differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv + md5sum: f0e6bb8eab1e6e08008a5a06934ef6f3 - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/CXCL2_ENSG00000081041.pdf - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/CXCL2_ENSG00000081041.png - path: results_test/differential_gene_expression/plots/boxplots_requested_genes/CXCL2_ENSG00000081041.svg From 322d388604d87257da8467b7e16b64c41c9cdb13 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 13:14:39 +0100 Subject: [PATCH 15/27] Update CHANGELOG.md added PR #251 Get raw gene count tables from either Salmon and RSEM analysis --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2a9b52..1723fb32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev + +### Added + +- [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis paths + + ## 2.4 - A Pair of Shoes ### Added From 4e2cdba46a006ce8b4769dcd0384240ae1358476 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 16:12:38 +0100 Subject: [PATCH 16/27] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1723fb32..1672174f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis paths +### Changed + +### Removed ## 2.4 - A Pair of Shoes From 18d1684cbf2f9d12bdbff02355d845639487c0e7 Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 16:14:44 +0100 Subject: [PATCH 17/27] Update CHANGELOG.md --- CHANGELOG.md | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1672174f..d973302d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,21 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed -## 2.4 - A Pair of Shoes - -### Added - -- [#245](https://github.com/qbic-pipelines/rnadeseq/pull/245) Added background gene list to pathway analysis output - -### Changed - -- [#249](https://github.com/qbic-pipelines/rnadeseq/pull/249) Release 2.4 -- [#248](https://github.com/qbic-pipelines/rnadeseq/pull/248) Version bumps for release 2.4 -- [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Changed the report text that shows when the gprofiler GMT file could not be downloaded - -### Removed - -- [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Removed hard-coded comment about usage of gprofiler databases KEGG and REAC in the report ## 2.3 - Flowering Orchards From 71f1c381d492ad031a3c1eeecb4fecc26f8770af Mon Sep 17 00:00:00 2001 From: qbicStefanC Date: Mon, 2 Dec 2024 16:17:20 +0100 Subject: [PATCH 18/27] Update CHANGELOG.md --- CHANGELOG.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d973302d..71c4c64f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,14 +6,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev ### Added +- [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis +- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error message for incorrect contrast_pairs -- [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis paths +### Changed + +### Fixed + +- [#252](https://github.com/qbic-pipelines/rnadeseq/pull/252) Fixed github CI bug by updating actions/upload-artifact +- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Fixed incorrect reading and indexing of contrast_pairs + +## 2.4 - A Pair of Shoes + +### Added + +- [#253](https://github.com/qbic-pipelines/rnadeseq/pull/253) Added separate param for adjusted p-value threshold for gprofiler +- [#245](https://github.com/qbic-pipelines/rnadeseq/pull/245) Added background gene list to pathway analysis output ### Changed -### Removed +- [#249](https://github.com/qbic-pipelines/rnadeseq/pull/249) Release 2.4 +- [#248](https://github.com/qbic-pipelines/rnadeseq/pull/248) Version bumps for release 2.4 +- [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Changed the report text that shows when the gprofiler GMT file could not be downloaded +### Removed +- [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Removed hard-coded comment about usage of gprofiler databases KEGG and REAC in the report + ## 2.3 - Flowering Orchards ### Added From c82108d336e46457db22dca89cf300bdaccde4de Mon Sep 17 00:00:00 2001 From: WackerO <43847497+WackerO@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:46:20 +0100 Subject: [PATCH 19/27] Update assets/RNAseq_report.Rmd --- assets/RNAseq_report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index e3fc6961..fa284699 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -629,7 +629,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { coldata$combfactor <- metadata$combfactor rownames(coldata) <- qbicCodes cds <- DESeqDataSetFromTximport(txi=txi.salmon, colData =coldata, design = eval(parse(text=as.character(design[[1]])))) - # get raw counts + # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) From 0e1f27ed47c6c33e979c3cc4f3d58865fbb7918e Mon Sep 17 00:00:00 2001 From: WackerO <43847497+WackerO@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:46:34 +0100 Subject: [PATCH 20/27] Update assets/RNAseq_report.Rmd --- assets/RNAseq_report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index fa284699..5995229f 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -592,7 +592,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { #dds from SummarizedExperiment , then run DESeq cds <- DESeqDataSet(se, design = as.formula(eval(parse(text=as.character(design[[1]]))))) - # get raw counts + # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) From e20c2b94bd2451cd7a8595ffbdcde98745bd6a89 Mon Sep 17 00:00:00 2001 From: WackerO <43847497+WackerO@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:50:35 +0100 Subject: [PATCH 21/27] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6e1684c..3d551f0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Removed hard-coded comment about usage of gprofiler databases KEGG and REAC in the report - ## 2.3 - Flowering Orchards ### Added From cb003058bbae4462df6221afa14da25217d3c4ee Mon Sep 17 00:00:00 2001 From: WackerO Date: Fri, 6 Dec 2024 14:59:56 +0100 Subject: [PATCH 22/27] Updated usage docs with missing params --- CHANGELOG.md | 1 + docs/usage.md | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d551f0c..7f76e624 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [#255](https://github.com/qbic-pipelines/rnadeseq/pull/255) Add usage docu for datasources, heatmaps_cluster_rows/cols and pathway_adj_pval_threshold params - [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis - [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error message for incorrect contrast_pairs diff --git a/docs/usage.md b/docs/usage.md index 9e5da48f..818bc9cb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -34,6 +34,9 @@ - [`--vst_genes_number`](#--vst_genes_number) - [`--round_DE`](#--round_DE) - [`--run_pathway_analysis`](#--run_pathway_analysis) + - [`--pathway_adj_pval_threshold`](#--pathway_adj_pval_threshold) + - [`--heatmaps_cluster_rows`](#--heatmaps_cluster_rows) + - [`--heatmaps_cluster_cols`](#--heatmaps_cluster_cols) - [`--input_type`](#--input_type) - [`--multiqc`](#--multiqc) - [`--project_summary`](#--project_summary) @@ -48,6 +51,7 @@ - [`--custom_gmt`](#--custom_gmt) - [`--set_background`](#--set_background) - [`--custom_background`](#--custom_background) + - [`--datasources`](#--datasources) - [`--igenomes_base`](#--igenomes_base) - [`--igenomes_ignore`](#--igenomes_ignore) - [Job resources](#job-resources) @@ -373,6 +377,18 @@ Integer indicating to how many decimals to round the DE results (default: -1, in Set this flag to run pathway analysis, otherwise, this step will be skipped. +### `--pathway_adj_pval_threshold` + +Use this param to specifically set the adjusted p value threshold for pathway enrichment analysis (will otherwise use the same adjusted p value threshold as for the DE analysis, as set with the param `--adj_pval_threshold`). + +### `--heatmaps_cluster_rows` + +Use this flag to set whether the heatmaps of gene expression in enriched pathways should by clustered row-wise (default true). + +### `--heatmaps_cluster_cols` + +Use this flag to set whether the heatmaps of gene expression in enriched pathways should by clustered column-wise (default false). + ### `--input_type` This tells the pipeline which type of input dataset is provided. Must be one of 'featurecounts', 'rsem', 'salmon', 'smrnaseq', default: featurecounts. @@ -450,6 +466,10 @@ Whether to restrict pathway analysis to a background gene list (default: true, w Path to custom background TXT file with one gene ID per line to use as background genes, not necessary if `--run_pathway_analysis = false` or `--set_background = false`. +### `--datasources` + +Which datasources to use for pathway analysis, comma-separated string like 'KEGG,REAC'. See param 'sources' on https://rdrr.io/cran/gprofiler2/man/gost.html for a list of available sources. If not set, will use all sources. If set while a --custom_gmt is provided, will filter the GMT for these datasources (will not filter for the GO subtypes like GO:BP, just for GO). + ## Job resources ### Automatic resubmission From 8bc2a7757c6e680359d9b09fdd93bb4d7ee44e50 Mon Sep 17 00:00:00 2001 From: WackerO Date: Mon, 9 Dec 2024 11:38:47 +0100 Subject: [PATCH 23/27] Added trycatch to enrichment plots --- CHANGELOG.md | 1 + assets/RNAseq_report.Rmd | 31 +++++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f76e624..9cb18a29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [#256](https://github.com/qbic-pipelines/rnadeseq/pull/256) Add trycatch to pathway enrichment plots so they are skipped when too large instead of throwing an error - [#255](https://github.com/qbic-pipelines/rnadeseq/pull/255) Add usage docu for datasources, heatmaps_cluster_rows/cols and pathway_adj_pval_threshold params - [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis - [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error message for incorrect contrast_pairs diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index 5995229f..d1ed04d0 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -2227,9 +2227,32 @@ for (file in contrast_files){ scale_fill_continuous(high = "#132B43", low = "#56B1F7") + ggtitle("Enriched pathways") + xlab("") + ylab("Gene fraction (DE genes / Pathway size)") - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + + # If the plots are huge ggsave will throw an error even if limitsize=T, so I'm leaving limitsize=F and instead using trycatch + tryCatch( + { + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + }, + error=function(e) { + print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf because of the following error:\n", e)) + } + ) + tryCatch( + { + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + }, + error=function(e) { + print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png because of the following error:\n", e)) + } + ) + tryCatch( + { + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + }, + error=function(e) { + print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg because of the following error:\n", e)) + } + ) # Plotting heatmaps and KEGG pathways for all pathways print("Plotting heatmaps...") @@ -2412,7 +2435,7 @@ Inside the pathway analysis results folder, a subfolder for each contrast used f - `*_gost_pathway_venn_diagram.pdf/png` - Venn diagrams showing the numbers of enriched pathways when using a background gene list vs when not using a bg list. - `enrichment_plots/*_pathway_enrichment_plot.{pdf/png/svg}` - - Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database. + - Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database (might be missing if too many pathways were enriched for fitting into a plot). - `gost_pathway_gostplot.{pdf/png/svg}` - Manhattan plots displaying all enriched pathways. - `KEGG_pathways/` From 0c6a3cd282c5159522e67caab13462bba4d91672 Mon Sep 17 00:00:00 2001 From: WackerO Date: Mon, 9 Dec 2024 11:39:59 +0100 Subject: [PATCH 24/27] prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cb18a29..14d87c82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Removed hard-coded comment about usage of gprofiler databases KEGG and REAC in the report + ## 2.3 - Flowering Orchards ### Added From 8a994adfa966669afce73182b139064b41c6adfc Mon Sep 17 00:00:00 2001 From: WackerO Date: Mon, 9 Dec 2024 11:42:23 +0100 Subject: [PATCH 25/27] prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f76e624..4e28065d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - [#247](https://github.com/qbic-pipelines/rnadeseq/pull/247) Removed hard-coded comment about usage of gprofiler databases KEGG and REAC in the report + ## 2.3 - Flowering Orchards ### Added From 9f9d30ed28c67a326328bff86c6c88d0df48e9af Mon Sep 17 00:00:00 2001 From: WackerO Date: Fri, 13 Dec 2024 09:17:17 +0100 Subject: [PATCH 26/27] fix stuff before release (add default description of pathway_adj_pval_threshold, remove excessive logic of the param --- assets/RNAseq_report.Rmd | 21 ++++++++++----------- bin/Execute_report.R | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index d1ed04d0..c66154db 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -457,7 +457,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { # Write raw counts to file count_table_names <- merge(x=gene_names, y=count.table, by.x = "Ensembl_ID", by.y="row.names") - write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) + write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) } # to get all possible pairwise comparisons, make a combined factor @@ -595,7 +595,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] - write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) + write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) # Load salmon count files } else if (params$input_type == "salmon") { @@ -632,7 +632,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { # get raw counts count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names") count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),] - write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) + write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) } } else { stop(paste0("Invalid input type: ", params$input_type, "! Input type must be one of: featurecounts, rsem, salmon, smrnaseq!")) @@ -2067,8 +2067,7 @@ if (!isProvided(params$datasources)) { # ------------------ # Set default params # ------------------ -pathway_adj_pval_threshold <- ifelse(params$pathway_adj_pval_threshold == -1, params$adj_pval_threshold, params$pathway_adj_pval_threshold) -pathway_pval_text <- format(pathway_adj_pval_threshold, scientific=F) +pathway_pval_text <- format(params$pathway_adj_pval_threshold, scientific=F) # Set theme for graphs theme_set(theme_classic()) @@ -2107,7 +2106,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_adj_pval_threshold, + user_threshold=params$pathway_adj_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2123,7 +2122,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_adj_pval_threshold, + user_threshold=params$pathway_adj_pval_threshold, domain_scope="annotated" ) pathway_gostres_nobg <- gostres_nobg$result @@ -2137,7 +2136,7 @@ for (file in contrast_files){ correction_method="fdr", sources=datasources, evcodes=TRUE, - user_threshold=pathway_adj_pval_threshold, + user_threshold=params$pathway_adj_pval_threshold, domain_scope="annotated" ) } @@ -2483,7 +2482,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=pathway_adj_pval_threshold, + user_threshold=params$pathway_adj_pval_threshold, custom_bg=custom_background, domain_scope="custom_annotated" ) @@ -2493,7 +2492,7 @@ if (length(q_list) > 0) { significant=T, correction_method="fdr", sources=datasources, - user_threshold=pathway_adj_pval_threshold, + user_threshold=params$pathway_adj_pval_threshold, domain_scope="annotated" ) } @@ -2506,7 +2505,7 @@ if (length(q_list) > 0) { if (nrow(path_enrich) > 0){ pg2 <- gostplot(gostres, capped=T, interactive=T) - pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -pathway_adj_pval_threshold + pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -params$pathway_adj_pval_threshold # limit gostplot y maximum dynamically for all subplots for (counter in c(1:length(contrast_files))) { diff --git a/bin/Execute_report.R b/bin/Execute_report.R index 7d313bff..c8f8fdca 100755 --- a/bin/Execute_report.R +++ b/bin/Execute_report.R @@ -37,7 +37,7 @@ option_list = list( make_option("--datasources", type="character", default=NULL, help="Which datasources to use for pathway analysis.", metavar="character"), make_option("--heatmaps_cluster_rows", action="store_true", default=FALSE, help="Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."), make_option("--heatmaps_cluster_cols", action="store_true", default=FALSE, help="Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."), - make_option("--pathway_adj_pval_threshold", type="double", default=-1, help="Which adjusted p value threshold to use for pathway analysis."), + make_option("--pathway_adj_pval_threshold", type="double", default=-1, help="Which adjusted p value threshold to use for pathway analysis. Will by default use the same value as the value of --adj_pval_threshold (default 0.05)."), make_option(c("-s", "--proj_summary"), type="character", default=NULL, help="Project summary file", metavar="character"), make_option(c("--path_quote"), type="character", default=NULL, help="Path to the quote PDF", metavar="character"), From 0b1409ff1e4e1ac60ad5e2aed9c8a38d61aeb8da Mon Sep 17 00:00:00 2001 From: WackerO Date: Fri, 13 Dec 2024 12:46:53 +0100 Subject: [PATCH 27/27] Bump versions --- .github/workflows/ci.yml | 8 ++++---- CHANGELOG.md | 6 +++++- Dockerfile | 6 +++--- environment.yml | 2 +- modules/local/report.nf | 2 +- nextflow.config | 2 +- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ddc61109..d1c4fb42 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,14 +39,14 @@ jobs: environment.yml - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.4 + run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.5 # Change the version above and the third version below before/after release - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull ghcr.io/qbic-pipelines/rnadeseq:dev - docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.4 + docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.5 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 @@ -93,14 +93,14 @@ jobs: environment.yml - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.4 + run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.5 # Change the version above and the third version below before/after release - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull ghcr.io/qbic-pipelines/rnadeseq:dev - docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.4 + docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.5 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 14d87c82..1eeca23f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev +## 2.5 - The Potato Eaters ### Added @@ -14,8 +14,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- [#260](https://github.com/qbic-pipelines/rnadeseq/pull/260) Release 2.5 +- [#259](https://github.com/qbic-pipelines/rnadeseq/pull/259) Bump versions for release 2.5 + ### Fixed +- [#258](https://github.com/qbic-pipelines/rnadeseq/pull/258) Fixed some comments for release (removed excess checks for pathway_adj_pval_threshold, added default explanation of that param to Execute_report.R, fixed some whitespace) - [#252](https://github.com/qbic-pipelines/rnadeseq/pull/252) Fixed github CI bug by updating actions/upload-artifact - [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Fixed incorrect reading and indexing of contrast_pairs diff --git a/Dockerfile b/Dockerfile index e1a58e5c..25f490cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,14 +5,14 @@ LABEL org.opencontainers.image.authors="Gisela Gabernet, Alexander Peltzer, Oska LABEL org.opencontainers.image.licenses=MIT COPY environment.yml / #RUN conda install -c conda-forge mamba -RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.4 && \ +RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.5 && \ mamba clean --all --yes RUN apt-get update -qq && \ apt-get install -y zip procps ghostscript # Add conda installation dir to PATH -ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.4/bin:$PATH +ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.5/bin:$PATH # Dump the details of the installed packates to a file for posterity -RUN mamba env export --name qbic-pipelines-rnadeseq-2.4 > qbic-pipelines-rnadeseq-2.4.yml +RUN mamba env export --name qbic-pipelines-rnadeseq-2.5 > qbic-pipelines-rnadeseq-2.5.yml # Instruct R processes to use these empty files instead of clashing with a local config RUN touch .Rprofile RUN touch .Renviron diff --git a/environment.yml b/environment.yml index e3e8f268..45eccb99 100644 --- a/environment.yml +++ b/environment.yml @@ -1,7 +1,7 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml # use this to find packages: https://anaconda.org/ -name: qbic-pipelines-rnadeseq-2.4 +name: qbic-pipelines-rnadeseq-2.5 channels: - bioconda - conda-forge diff --git a/modules/local/report.nf b/modules/local/report.nf index a67227f9..a3bf33e2 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -1,6 +1,6 @@ process REPORT { - container 'ghcr.io/qbic-pipelines/rnadeseq:2.4' + container 'ghcr.io/qbic-pipelines/rnadeseq:2.5' input: path gene_counts diff --git a/nextflow.config b/nextflow.config index b5a13834..bb0f314a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -277,7 +277,7 @@ manifest { description = """Differential gene expression analysis and pathway analysis of RNAseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4' + version = '2.5' doi = '' }