Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lane to bclconvert samplesheet if it doesn't exist #552

Merged
merged 2 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def collect_analysis_objects(project_id: str, analysis_id: str) -> Dict:
logger.info("Reading in the samplesheet")
samplesheet_dict = read_v2_samplesheet(
project_id=project_id,
data_id=samplesheet_file_id
samplesheet_data_id=samplesheet_file_id,
runinfo_data_id=run_info_file_id
)

return {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .runinfo_helper import get_run_id_from_run_info
from .runinfo_helper import get_run_id_from_run_info, get_num_lanes_from_run_info

__all__ = [
'get_num_lanes_from_run_info',
'get_run_id_from_run_info'
]
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,12 @@ def get_run_id_from_run_info(project_id: str, data_id: str) -> str:
"""
return read_runinfo_xml(project_id, data_id)['RunInfo']['Run']['@Id']


def get_num_lanes_from_run_info(project_id: str, data_id: str) -> int:
"""
Get the number of lanes in a run info object
:param project_id:
:param data_id:
:return:
"""
return int(read_runinfo_xml(project_id, data_id)['RunInfo']['Run']['FlowcellLayout']['@LaneCount'])
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from io import StringIO
from typing import Dict

from more_itertools import flatten
# UMCCR Libraries
from v2_samplesheet_maker.functions.v2_samplesheet_reader import v2_samplesheet_reader
from wrapica.project_data import read_icav2_file_contents_to_string
Expand All @@ -19,12 +20,17 @@
logger.setLevel(logging.INFO)


def read_v2_samplesheet(project_id: str, data_id: str) -> Dict:
def read_v2_samplesheet(
project_id: str,
samplesheet_data_id: str,
runinfo_data_id: str
) -> Dict:
"""
Given a v2 samplesheet path, read in the file as a v2 samplesheet (we first convert to json)

:param project_id:
:param data_id:
:param samplesheet_data_id:
:param runinfo_data_id

:return: A dictionary

Expand Down Expand Up @@ -90,12 +96,43 @@ def read_v2_samplesheet(project_id: str, data_id: str) -> Dict:
]
}
"""
from ..runinfo import get_num_lanes_from_run_info

return v2_samplesheet_reader(
v2_samplesheet_dict = v2_samplesheet_reader(
StringIO(
read_icav2_file_contents_to_string(
project_id=project_id,
data_id=data_id
data_id=samplesheet_data_id
)
)
)

# Get bclconvert data from the v2 samplesheet dict
# And confirm that the lane column is present
if 'lane' in v2_samplesheet_dict['bclconvert_data'][0].keys():
# Return the samplesheet as is
return v2_samplesheet_dict

# Otherwise we read the runinfo file
num_lanes = get_num_lanes_from_run_info(
project_id=project_id,
data_id=runinfo_data_id
)

# And now append the lane attribute to every
v2_samplesheet_dict['bclconvert_data'] = flatten(
map(
lambda bclconvert_data_row_iter: list(
map(
lambda lane_iter: {
**bclconvert_data_row_iter,
**{"lane": lane_iter + 1}
},
range(num_lanes)
)
),
v2_samplesheet_dict['bclconvert_data']
),
)

return v2_samplesheet_dict
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,5 @@ def parse_event_code(event_code):
# # "statusCode": 200,
# # "body": "\"Internal event sent to the event bus and both msg stored in the DynamoDB table.\""
# # }


Loading