Skip to content

Commit

Permalink
Merge pull request #21 from phac-nml/dev
Browse files Browse the repository at this point in the history
Update to version 0.2.0
  • Loading branch information
kylacochrane authored Sep 5, 2024
2 parents e7e73cf + 03b0d3a commit e46c369
Show file tree
Hide file tree
Showing 19 changed files with 243 additions and 70 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.0] - 2024/09/05

### `Changed`

- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format [PR20](https://github.com/phac-nml/gasnomenclature/pull/20)
- Removed `quay.io` docker repository tags from modules [PR19](https://github.com/phac-nml/gasnomenclature/pull/19)

This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0).

## [0.1.0] - 2024/06/28

Initial release of the Genomic Address Nomenclature pipeline to be used to assign cluster addresses to samples based on an existing cluster designations.
Expand All @@ -13,3 +22,4 @@ Initial release of the Genomic Address Nomenclature pipeline to be used to assig
- Output of assigned cluster addresses for any **query** samples using [profile_dists](https://github.com/phac-nml/profile_dists) and [gas call](https://github.com/phac-nml/genomic_address_service).

[0.1.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.1.0
[0.2.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.0
27 changes: 16 additions & 11 deletions bin/input_assure.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,43 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f
with open_file(json_file, "rt") as f:
json_data = json.load(f)

# Extract the profile from the json_data
profile = json_data.get("data", {}).get("profile", {})
# Check for multiple keys in the JSON file and define error message
keys = sorted(profile.keys())
original_key = keys[0] if keys else None

# Define a variable to store the match_status (True or False)
match_status = sample_id in json_data
match_status = sample_id in profile

# Initialize the error message
error_message = None

# Check for multiple keys in the JSON file and define error message
keys = list(json_data.keys())
original_key = keys[0] if keys else None

if len(keys) == 0:
error_message = f"{json_file} is completely empty!"
if not keys:
error_message = (
f"{json_file} is missing the 'profile' section or is completely empty!"
)
print(error_message)
sys.exit(1)
elif len(keys) > 1:
# Check if sample_id matches any key
if not match_status:
error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
# Retain only the specified sample ID
json_data = {sample_id: json_data.pop(original_key)}
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
else:
error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
# Remove all keys expect the one matching sample_id
json_data = {sample_id: json_data[sample_id]}
# Retain only the specified sample_id in the profile
json_data["data"]["profile"] = {sample_id: profile[sample_id]}
elif not match_status:
# Define error message based on meta.address (query or reference)
if address == "null":
error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
else:
error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
# Update the JSON file with the new sample ID
json_data[sample_id] = json_data.pop(original_key)
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
json_data["data"]["sample_name"] = sample_id

# Write file containing relevant error messages
if error_message:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/gas/call/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process GAS_CALL{

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"


input:
Expand Down
5 changes: 3 additions & 2 deletions modules/local/locidex/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ process LOCIDEX_MERGE {
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' :
'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"
"docker.io/mwells14/locidex:0.2.3" :
task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' :
'mwells14/locidex:0.2.3' }"

input:
path input_values // [file(sample1), file(sample2), file(sample3), etc...]
Expand Down
2 changes: 1 addition & 1 deletion modules/local/profile_dists/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ process PROFILE_DISTS{

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' :
'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"
'biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"

input:
path query
Expand Down
5 changes: 4 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ docker.registry = 'quay.io'
podman.registry = 'quay.io'
singularity.registry = 'quay.io'

// Override the default Docker registry when required
process.ext.override_configured_container_registry = true

// Nextflow plugins
plugins {
id '[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
Expand Down Expand Up @@ -219,7 +222,7 @@ manifest {
description = """Gas Nomenclature assignment pipeline"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.1.0'
version = '0.2.0'
doi = ''
defaultBranch = 'main'
}
Expand Down
22 changes: 18 additions & 4 deletions tests/data/reports/sample1.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample1",
"profile": {
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
Binary file modified tests/data/reports/sample1.mlst.json.gz
Binary file not shown.
22 changes: 18 additions & 4 deletions tests/data/reports/sample2.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample2_missing.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "-",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "-",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample3.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample3_missing.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "-",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "-",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
30 changes: 22 additions & 8 deletions tests/data/reports/sample3_multiplekeys.mlst.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
"data": {
"sample_name": "sample3",
"profile": {
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
},
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
30 changes: 22 additions & 8 deletions tests/data/reports/sample3_multiplekeys_nomatch.mlst.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"sample4": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
"data": {
"sample_name": "sample4",
"profile": {
"sample4": {
"l1": "1",
"l2": "1",
"l3": "2"
},
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample7.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample7": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample7",
"profile": {
"sample7": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sampleF.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sampleF": {
"l1": "1",
"l2": "2",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sampleF",
"profile": {
"sampleF": {
"l1": "1",
"l2": "2",
"l3": "1"
}
},
"seq_data": {}
}
}
Loading

0 comments on commit e46c369

Please sign in to comment.