Merge pull request #21 from phac-nml/dev

Update to version 0.2.0
phac-nml · Sep 5, 2024 · e46c369 · e46c369
2 parents e7e73cf + 03b0d3a
commit e46c369
Show file tree

Hide file tree

Showing 19 changed files with 243 additions and 70 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,15 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.0] - 2024/09/05
+
+### `Changed`
+
+- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format [PR20](https://github.com/phac-nml/gasnomenclature/pull/20)
+- Removed `quay.io` docker repository tags from modules [PR19](https://github.com/phac-nml/gasnomenclature/pull/19)
+
+This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0).
+
 ## [0.1.0] - 2024/06/28
 
 Initial release of the Genomic Address Nomenclature pipeline to be used to assign cluster addresses to samples based on an existing cluster designations.
@@ -13,3 +22,4 @@ Initial release of the Genomic Address Nomenclature pipeline to be used to assig
 - Output of assigned cluster addresses for any **query** samples using [profile_dists](https://github.com/phac-nml/profile_dists) and [gas call](https://github.com/phac-nml/genomic_address_service).
 
 [0.1.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.1.0
+[0.2.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.0
diff --git a/bin/input_assure.py b/bin/input_assure.py
@@ -19,38 +19,43 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f
     with open_file(json_file, "rt") as f:
         json_data = json.load(f)
 
+    # Extract the profile from the json_data
+    profile = json_data.get("data", {}).get("profile", {})
+    # Check for multiple keys in the JSON file and define error message
+    keys = sorted(profile.keys())
+    original_key = keys[0] if keys else None
+
     # Define a variable to store the match_status (True or False)
-    match_status = sample_id in json_data
+    match_status = sample_id in profile
 
     # Initialize the error message
     error_message = None
 
-    # Check for multiple keys in the JSON file and define error message
-    keys = list(json_data.keys())
-    original_key = keys[0] if keys else None
-
-    if len(keys) == 0:
-        error_message = f"{json_file} is completely empty!"
+    if not keys:
+        error_message = (
+            f"{json_file} is missing the 'profile' section or is completely empty!"
+        )
         print(error_message)
         sys.exit(1)
     elif len(keys) > 1:
         # Check if sample_id matches any key
         if not match_status:
             error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
             # Retain only the specified sample ID
-            json_data = {sample_id: json_data.pop(original_key)}
+            json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
         else:
             error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
-            # Remove all keys expect the one matching sample_id
-            json_data = {sample_id: json_data[sample_id]}
+            # Retain only the specified sample_id in the profile
+            json_data["data"]["profile"] = {sample_id: profile[sample_id]}
     elif not match_status:
         # Define error message based on meta.address (query or reference)
         if address == "null":
             error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
         else:
             error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
         # Update the JSON file with the new sample ID
-        json_data[sample_id] = json_data.pop(original_key)
+        json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
+        json_data["data"]["sample_name"] = sample_id
 
     # Write file containing relevant error messages
     if error_message:

diff --git a/modules/local/gas/call/main.nf b/modules/local/gas/call/main.nf
@@ -6,7 +6,7 @@ process GAS_CALL{
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
-        'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
+        'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
 
 
     input:

diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf
@@ -5,8 +5,9 @@ process LOCIDEX_MERGE {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' :
-    'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"
+    "docker.io/mwells14/locidex:0.2.3" :
+    task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' :
+    'mwells14/locidex:0.2.3' }"
 
     input:
     path input_values // [file(sample1), file(sample2), file(sample3), etc...]

diff --git a/modules/local/profile_dists/main.nf b/modules/local/profile_dists/main.nf
@@ -4,7 +4,7 @@ process PROFILE_DISTS{
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' :
-        'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"
+        'biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"
 
     input:
     path query

diff --git a/nextflow.config b/nextflow.config
@@ -168,6 +168,9 @@ docker.registry      = 'quay.io'
 podman.registry      = 'quay.io'
 singularity.registry = 'quay.io'
 
+// Override the default Docker registry when required
+process.ext.override_configured_container_registry = true
+
 // Nextflow plugins
 plugins {
     id '[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
@@ -219,7 +222,7 @@ manifest {
     description     = """Gas Nomenclature assignment pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '0.1.0'
+    version         = '0.2.0'
     doi             = ''
     defaultBranch   = 'main'
 }

diff --git a/tests/data/reports/sample1.mlst.json b/tests/data/reports/sample1.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample1": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample1",
+        "profile": {
+            "sample1": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample1.mlst.json.gz b/tests/data/reports/sample1.mlst.json.gz
diff --git a/tests/data/reports/sample2.mlst.json b/tests/data/reports/sample2.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample2": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample2",
+        "profile": {
+            "sample2": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample2_missing.mlst.json b/tests/data/reports/sample2_missing.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample2": {
-        "l1": "-",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample2",
+        "profile": {
+            "sample2": {
+                "l1": "-",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample3.mlst.json b/tests/data/reports/sample3.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample3": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "2"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "sample3": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "2"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample3_missing.mlst.json b/tests/data/reports/sample3_missing.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample3": {
-        "l1": "-",
-        "l2": "1",
-        "l3": "2"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "sample3": {
+                "l1": "-",
+                "l2": "1",
+                "l3": "2"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample3_multiplekeys.mlst.json b/tests/data/reports/sample3_multiplekeys.mlst.json
@@ -1,12 +1,26 @@
 {
-    "extra_key": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "2"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
     },
-    "sample3": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "2"
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "extra_key": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "2"
+            },
+            "sample3": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "2"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json b/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json
@@ -1,12 +1,26 @@
 {
-    "sample4": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "2"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
     },
-    "extra_key": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "2"
+    "data": {
+        "sample_name": "sample4",
+        "profile": {
+            "sample4": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "2"
+            },
+            "extra_key": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "2"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample7.mlst.json b/tests/data/reports/sample7.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample7": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample7",
+        "profile": {
+            "sample7": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sampleF.mlst.json b/tests/data/reports/sampleF.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sampleF": {
-        "l1": "1",
-        "l2": "2",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sampleF",
+        "profile": {
+            "sampleF": {
+                "l1": "1",
+                "l2": "2",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }