-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #763 from umccr/enhancement/allow-cttsov2-ora-comp…
…ression-inputs Add ora compression to cttsov2 workflow
- Loading branch information
Showing
6 changed files
with
394 additions
and
121 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
...v2-pipeline-manager/lambdas/check_fastq_list_row_is_ora_py/check_fastq_list_row_is_ora.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Check fastq list row is ora | ||
""" | ||
|
||
|
||
def handler(event, context): | ||
""" | ||
Collect the read1FileUri and read2FileUri from the fastq list and check if they are in the ora format, | ||
return True if they are, False otherwise | ||
:param event: | ||
:param context: | ||
:return: | ||
""" | ||
|
||
# Get the fastq list from the event | ||
fastq_list_row = event['fastq_list_row'] | ||
|
||
# Check if the read1FileUri and read2FileUri are in the ora format | ||
if fastq_list_row.get("read1FileUri").endswith(".ora") and fastq_list_row.get("read2FileUri").endswith(".ora"): | ||
return { | ||
"is_ora": True | ||
} | ||
elif fastq_list_row.get("read1FileUri").endswith(".gz") and fastq_list_row.get("read2FileUri").endswith(".gz"): | ||
return { | ||
"is_ora": False | ||
} | ||
else: | ||
raise ValueError("The read1FileUri and read2FileUri need to be in the same format") |
58 changes: 58 additions & 0 deletions
58
...e-manager/lambdas/convert_ora_to_cache_uri_gz_path_py/convert_ora_to_cache_uri_gz_path.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
Given a fastq list row in ora format, a cache uri and a sample id, | ||
Determine the output gzip path for the fastq files | ||
Returns read_1_gz_output_uri and read_2_gz_output_uri | ||
""" | ||
|
||
from urllib.parse import (urlparse, urlunparse) | ||
from pathlib import Path | ||
|
||
def extend_url(url, path_ext: str) -> str: | ||
""" | ||
Extend the url path with the path_ext | ||
""" | ||
url_obj = urlparse(url) | ||
|
||
return str( | ||
urlunparse( | ||
( | ||
url_obj.scheme, | ||
url_obj.netloc, | ||
str(Path(url_obj.path) / path_ext), | ||
url_obj.params, | ||
url_obj.query, | ||
url_obj.fragment | ||
) | ||
) | ||
) | ||
|
||
|
||
def handler(event, context): | ||
# Get the input event | ||
cache_uri = event['cache_uri'] | ||
|
||
# Get the input event | ||
sample_id = event['sample_id'] | ||
|
||
# Get the input event | ||
fastq_list_row = event['fastq_list_row'] | ||
read_1_ora_file_uri = fastq_list_row['read1FileUri'] | ||
read_2_ora_file_uri = fastq_list_row['read2FileUri'] | ||
|
||
# Extend the cache uri to include the sample id | ||
sample_cache_uri = extend_url(cache_uri, sample_id) | ||
|
||
# Get the file name from the ora file uri | ||
# And replace the .ora extension with .gz | ||
read_1_file_name = Path(read_1_ora_file_uri).name.replace('.ora', '.gz') | ||
read_2_file_name = Path(read_2_ora_file_uri).name.replace('.ora', '.gz') | ||
|
||
# Get the output uri for the gz files | ||
return { | ||
'read_1_gz_output_uri': extend_url(sample_cache_uri, read_1_file_name), | ||
'read_2_gz_output_uri': extend_url(sample_cache_uri, read_2_file_name) | ||
} |
Oops, something went wrong.