uclahs-cds · nwiltsie · Jun 14, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 - Add options to handle compressed files with VCF indexing workflow
 - Add `bgzip` to `index_VCF_tabix` module
 - Add PipeVal generate-checksum module
+- Add `run_validate_PipeVal_with_metadata` method
 
 ### Changed
 - Use `ghcr.io/uclahs-cds` as default registry

diff --git a/README.md b/README.md
@@ -116,23 +116,35 @@ Outputs:
 
 ##### Description
 
-Module for validating files and directories using PipeVal
+Module for validating files and directories using PipeVal. There are two nearly-identical methods in this module: `run_validate_PipeVal` and `run_validate_PipeVal_with_metadata`.
 
 Tools used: `PipeVal`.
 
 Inputs:
   - `file_to_validate`: path for file or directory to validate
 
+Inputs:
+  - `run_validate_PipeVal`:
+    - `file_to_validate`: path for file to generate a checksum
+  - `run_validate_PipeVal_with_metadata` Inputs:
+    - A tuple of:
+      - `file_to_validate`: path for file to generate a checksum
+      - `metadata`: arbitrary `val` passed through to the output
+
 Parameters:
   - `log_output_dir`: directory for storing log files
   - `docker_image_version`: PipeVal docker image version within which process will run. The default is: `4.0.0-rc.2`
   - `process_label`: assign Nextflow process label to process to control resource allocation. For specific CPU and memory allocation, include static allocations in node-specific config files
   - `main_process`: Set output directory to the specified main process instead of `PipeVal-4.0.0-rc.2`
 
+Outputs:
+  - `validation_result`: path of file with validation output text
+  - `validated_file`: `file_to_validate` or tuple of (`file_to_validate`, `metadata`)
+
 ##### How to use
 
 1. Add this repository as a submodule in the pipeline of interest
-2. Include the `run_validate_PipeVal` process from the module `main.nf` with a relative path
+2. Include the `run_validate_PipeVal` or `run_validate_PipeVal_with_metadata` process from the module `main.nf` with a relative path
 3. Use the `addParams` directive when importing to specify any params
 4. Call the process with the inputs where needed
 5. Aggregate and save the output validation files as needed

diff --git a/modules/PipeVal/validate/main.nf b/modules/PipeVal/validate/main.nf
@@ -47,3 +47,52 @@ process run_validate_PipeVal {
     fi
     """
 }
+
+/**
+*   Nextflow module for validating files and directories.
+*
+*   This variant accepts and emits a tuple so that the validated path can be
+*   associated with arbitrary metadata.
+*
+*   @input  file_to_validate    path    File or directory to validate
+*   @input  metadata    val Arbitrary metadata associated with the value.
+*
+*   @params log_output_dir  path    Directory for saving log files
+*   @params docker_image_version    string  Version of PipeVal image for validation
+*   @params main_process    string  (Optional) Name of main output directory
+*/
+process run_validate_PipeVal_with_metadata {
+    container options.docker_image
+    label options.process_label
+
+    publishDir path: { options.main_process ?
+        "${options.log_output_dir}/process-log/${options.main_process}" :
+        "${options.log_output_dir}/process-log/PipeVal-${options.docker_image_version}"
+        },
+        pattern: ".command.*",
+        mode: "copy",
+        saveAs: { "${task.process.split(':')[-1]}/${task.process.split(':')[-1]}-${task.index}/log${file(it).getName()}" }
+
+    // This process uses the publishDir method to save the log files
+    ext capture_logs: false
+
+    input:
+        tuple path(file_to_validate), val(metadata)
+
+    output:
+        path(".command.*")
+        path("validation.txt"), emit: validation_result
+        tuple path(file_to_validate), val(metadata), emit: validated_file
+
+    script:
+    """
+    set -euo pipefail
+
+    if command -v pipeval &> /dev/null
+    then
+        pipeval validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt'
+    else
+        validate ${file_to_validate} ${options.validate_extra_args} > 'validation.txt'
+    fi
+    """
+}