From 3bf48f40cac10b4f699e9f781ed91dbb60d3dfe7 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 13 Jun 2024 13:10:20 -0700 Subject: [PATCH 1/4] Add run_validate_PipeVal_with_metadata method --- modules/PipeVal/validate/main.nf | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/modules/PipeVal/validate/main.nf b/modules/PipeVal/validate/main.nf index a80a82c..a6dfef7 100644 --- a/modules/PipeVal/validate/main.nf +++ b/modules/PipeVal/validate/main.nf @@ -41,3 +41,46 @@ process run_validate_PipeVal { validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt' """ } + +/** +* Nextflow module for validating files and directories. +* +* This variant accepts and emits a tuple so that the validated path can be +* associated with arbitrary metadata. +* +* @input file_to_validate path File or directory to validate +* @input metadata val Arbitrary metadata associated with the value. +* +* @params log_output_dir path Directory for saving log files +* @params docker_image_version string Version of PipeVal image for validation +* @params main_process string (Optional) Name of main output directory +*/ +process run_validate_PipeVal_with_metadata { + container options.docker_image + label options.process_label + + publishDir path: { options.main_process ? + "${options.log_output_dir}/process-log/${options.main_process}" : + "${options.log_output_dir}/process-log/PipeVal-${options.docker_image_version}" + }, + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.split(':')[-1]}/${task.process.split(':')[-1]}-${task.index}/log${file(it).getName()}" } + + // This process uses the publishDir method to save the log files + ext capture_logs: false + + input: + tuple path(file_to_validate), val(metadata) + + output: + path(".command.*") + path("validation.txt"), emit: validation_result + tuple path(file_to_validate), val(metadata), emit: validated_file + + script: + """ + set -euo pipefail + validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt' + """ +} From c109ac88746c3717300ddc31f9f82dbe3de1c17c Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 13 Jun 2024 13:52:25 -0700 Subject: [PATCH 2/4] Update README --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 47cfffc..36b05ce 100644 --- a/README.md +++ b/README.md @@ -116,23 +116,35 @@ Outputs: ##### Description -Module for validating files and directories using PipeVal +Module for validating files and directories using PipeVal. There are two nearly-identical methods in this module: `run_validate_PipeVal` and `run_validate_PipeVal_with_metadata`. Tools used: `PipeVal`. Inputs: - `file_to_validate`: path for file or directory to validate +Inputs: + - `run_validate_PipeVal`: + - `file_to_validate`: path for file to generate a checksum + - `run_validate_PipeVal_with_metadata` Inputs: + - A tuple of: + - `file_to_validate`: path for file to generate a checksum + - `metadata`: arbitrary `val` passed through to the output + Parameters: - `log_output_dir`: directory for storing log files - `docker_image_version`: PipeVal docker image version within which process will run. The default is: `4.0.0-rc.2` - `process_label`: assign Nextflow process label to process to control resource allocation. For specific CPU and memory allocation, include static allocations in node-specific config files - `main_process`: Set output directory to the specified main process instead of `PipeVal-4.0.0-rc.2` +Outputs: + - `validation_result`: path of file with validation output text + - `validated_file`: `file_to_validate` or tuple of (`file_to_validate`, `metadata`) + ##### How to use 1. Add this repository as a submodule in the pipeline of interest -2. Include the `run_validate_PipeVal` process from the module `main.nf` with a relative path +2. Include the `run_validate_PipeVal` or `run_validate_PipeVal_with_metadata` process from the module `main.nf` with a relative path 3. Use the `addParams` directive when importing to specify any params 4. Call the process with the inputs where needed 5. Aggregate and save the output validation files as needed From 7498d12e5983c29000b40a889ee07408ffb2bae6 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 13 Jun 2024 13:55:42 -0700 Subject: [PATCH 3/4] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c75b8b3..d1eb43d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Add options to handle compressed files with VCF indexing workflow - Add `bgzip` to `index_VCF_tabix` module - Add PipeVal generate-checksum module +- Add `run_validate_PipeVal_with_metadata` method ### Changed - Use `ghcr.io/uclahs-cds` as default registry From c4629318b6a45ef49e76079019ee8c7058ccc43f Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 14 Jun 2024 14:29:05 -0700 Subject: [PATCH 4/4] Support new pipeval entrypoint --- modules/PipeVal/validate/main.nf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/PipeVal/validate/main.nf b/modules/PipeVal/validate/main.nf index 3045304..c7ae8e6 100644 --- a/modules/PipeVal/validate/main.nf +++ b/modules/PipeVal/validate/main.nf @@ -87,6 +87,12 @@ process run_validate_PipeVal_with_metadata { script: """ set -euo pipefail - validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt' + + if command -v pipeval &> /dev/null + then + pipeval validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt' + else + validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt' + fi """ }