Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.9.0 to develop #81

Merged
merged 12 commits into from
Mar 14, 2024
4 changes: 2 additions & 2 deletions clarity_epp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def get_sample_artifacts_from_pool(lims, pool_artifact):
# Check if sample_artifact with 2 samples are from the same person
if len(sample_artifact.samples) == 2:
if (
'Dx Persoons ID' in sample_artifact.samples[0].udf or
'Dx Persoons ID' in sample_artifact.samples[1].udf or
'Dx Persoons ID' in sample_artifact.samples[0].udf and
'Dx Persoons ID' in sample_artifact.samples[1].udf and
sample_artifact.samples[0].udf['Dx Persoons ID'] == sample_artifact.samples[1].udf['Dx Persoons ID']
):
sample_artifacts.append(sample_artifact)
Expand Down
29 changes: 16 additions & 13 deletions clarity_epp/export/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,16 @@ def create_samplesheet(lims, process_id, output_file):
process = Process(lims, id=process_id)
sequencer_conversion_settings = config.sequencer_conversion_settings[process.type.name]

# Get output container assume one flowcell per sequencing run
output_container = process.output_containers()[0]

# Get samples samples per lane
samplesheet_samples = []
for lane in process.analytes()[0]:
sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
samplesheet_samples.append(
get_samplesheet_samples(
sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
)
samplesheet_samples = {}
for lane_idx, lane_artifact in output_container.get_placements().items():
lane_idx = lane_idx.split(':')[0]
sample_artifacts = get_sample_artifacts_from_pool(lims, lane_artifact)
samplesheet_samples[lane_idx] = get_samplesheet_samples(
sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
)

# Create SampleSheet
Expand All @@ -264,6 +266,7 @@ def create_samplesheet(lims, process_id, output_file):
"FindAdaptersWithIndels,TRUE",
"BarcodeMismatchesIndex1,0",
"BarcodeMismatchesIndex2,0",
"TrimUMI,TRUE",
# BCLConvert_Data
"[BCLConvert_Data]"
]
Expand All @@ -278,17 +281,17 @@ def create_samplesheet(lims, process_id, output_file):
sample_sheet.append(bcl_convert_data_header)

# Add samples to SampleSheet
for lane, lane_samples in enumerate(samplesheet_samples):
for lane, lane_samples in sorted(samplesheet_samples.items()):
for sample in lane_samples:
bcl_convert_data_row = "{sample_name},{index_1},{index_2},{override_cycles},{project}".format(
sample_name=sample,
index_1=samplesheet_samples[lane][sample]['index_1'],
index_2=samplesheet_samples[lane][sample]['index_2'],
override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
project=samplesheet_samples[lane][sample]['project']
index_1=lane_samples[sample]['index_1'],
index_2=lane_samples[sample]['index_2'],
override_cycles=lane_samples[sample]['override_cycles'],
project=lane_samples[sample]['project']
)
if multiple_lanes: # Add lane number to row if multiple lanes conversion
bcl_convert_data_row = f"{lane+1},{bcl_convert_data_row}"
bcl_convert_data_row = f"{lane},{bcl_convert_data_row}"
sample_sheet.append(bcl_convert_data_row)

# Write SampleSheet to file
Expand Down
12 changes: 11 additions & 1 deletion clarity_epp/export/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@
def create_file(lims, process_id, output_file):
"""Create mege file."""
process = Process(lims, id=process_id)
sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])

# Get output container assume one flowcell per sequencing run
output_container = process.output_containers()[0]

# Get unique sample artifacts in run
# TODO: This is a copy of the code from ped.py. It should be refactored to a common function.
sample_artifacts = []
for lane_artifact in output_container.get_placements().values():
for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):
if sample_artifact not in sample_artifacts:
sample_artifacts.append(sample_artifact)

output_file.write('Sample\tMerge 1 Sample\tMerge 1 Sequencing Run\tMerge 2 Sample\tMerge 2 Sequencing Run\n')

Expand Down
13 changes: 12 additions & 1 deletion clarity_epp/export/ped.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@
def create_file(lims, process_id, output_file):
"""Create ped file."""
process = Process(lims, id=process_id)
sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])

# Get output container assume one flowcell per sequencing run
output_container = process.output_containers()[0]

# Get unique sample artifacts in run
# TODO: This is a copy of the code from merge.py. It should be refactored to a common function.
sample_artifacts = []
for lane_artifact in output_container.get_placements().values():
for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):
if sample_artifact not in sample_artifacts:
sample_artifacts.append(sample_artifact)

ped_families = {}

for sample_artifact in sample_artifacts:
Expand Down
15 changes: 10 additions & 5 deletions clarity_epp/placement/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from genologics.entities import Process, Workflow

from .. import get_sequence_name
from clarity_epp.export.utils import sort_artifact_list
import config


Expand All @@ -17,19 +18,23 @@ def set_sequence_name(lims, process_id):
def set_runid_name(lims, process_id):
"""Change artifact name to run id."""
process = Process(lims, id=process_id)
analyte = process.analytes()[0][0]
input_artifact = process.all_inputs()[0]

container_name = analyte.container.name
# Fix for NovaSeqXPlus workflow configuration
# TODO: Set NovaSeqXPlus step to 'Analysis' type.
if 'NovaSeqXPlus' in input_artifact.parent_process.type.name:
input_artifact = input_artifact.parent_process.all_inputs()[0]

# Find sequencing process
# Assume one sequence process per input artifact
for sequence_process_type in config.sequence_process_types:
sequence_processes = lims.get_processes(type=sequence_process_type, inputartifactlimsid=input_artifact.id)
for sequence_process in sequence_processes:
if sequence_process.analytes()[0][0].container.name == container_name:
analyte.name = sequence_process.udf['Run ID']
analyte.put()
sequence_process_lanes = sorted(sequence_process.analytes()[0], key=sort_artifact_list)
for lane_idx, lane in enumerate(sorted(process.analytes()[0], key=sort_artifact_list)):
if sequence_process_lanes[lane_idx].container.name == lane.container.name:
lane.name = sequence_process.udf['Run ID']
lane.put()


def route_to_workflow(lims, process_id, workflow):
Expand Down
37 changes: 21 additions & 16 deletions clarity_epp/placement/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ def unpooling(lims, process_id):
pool_artifact = process.all_inputs()[0]
pool_artifact_parent_process = pool_artifact.parent_process

# Fix for NovaSeqXPlus workflow configuration
# TODO: Set NovaSeqXPlus step to 'Analysis' type.
if 'laden' not in pool_artifact_parent_process.type.name.lower():
pool_artifact_parent_process = pool_artifact_parent_process.all_inputs()[0].parent_process

run_id = pool_artifact.name # Assume run id is set as pool name using placement/artifact/set_runid_name
sample_artifacts = [] # sample artifacts before pooling
sample_projects = {}
Expand All @@ -35,22 +40,22 @@ def unpooling(lims, process_id):
sample_projects[data[sample_index]] = data[project_index]

# Parse sequencing run samples and move Dx samples to post sequencing workflow
for sample_artifact in get_sample_artifacts_from_pool(lims, pool_artifact):
sample = sample_artifact.samples[0] # Asume all samples metadata is identical.

# Set sample sequencing run and project
sample_artifact.udf['Dx Sequencing Run ID'] = run_id
# Use sample.name for external (clarity_portal) samples
if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']:
sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name]
else: # Use sample_artifact.name for Dx samples (upload via Helix)
sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name]
sample_artifact.put()

# Only move DX production samples to post sequencing workflow
if sample.project and sample.project.udf['Application'] == 'DX':
sample_artifacts.append(sample_artifact)

for lane in process.all_inputs():
for sample_artifact in get_sample_artifacts_from_pool(lims, lane):
sample = sample_artifact.samples[0] # Asume all samples metadata is identical.

# Set sample sequencing run and project
sample_artifact.udf['Dx Sequencing Run ID'] = run_id
# Use sample.name for external (clarity_portal) samples
if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']:
sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name]
else: # Use sample_artifact.name for Dx samples (upload via Helix)
sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name]
sample_artifact.put()

# Only move DX production samples to post sequencing workflow
if sample_artifact not in sample_artifacts and sample.project and sample.project.udf['Application'] == 'DX':
sample_artifacts.append(sample_artifact)
lims.route_artifacts(sample_artifacts, workflow_uri=Workflow(lims, id=config.post_sequencing_workflow).uri)


Expand Down
2 changes: 1 addition & 1 deletion clarity_epp/qc/qubit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def set_qc_flag(lims, process_id, cutoff=10):
"""Set qubit qc flags based on Dx Concentratie fluorescentie (ng/ul) values."""
process = Process(lims, id=process_id)
artifacts = process.result_files()
concentration_range = map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)']))
concentration_range = list(map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)'])))
samples_measurements = {}

for artifact in artifacts:
Expand Down
4 changes: 2 additions & 2 deletions clarity_epp/upload/tecan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def results_qc(lims, process_id):
"""Upload tecan results to artifacts."""
process = Process(lims, id=process_id)
concentration_range = map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)']))
concentration_range = list(map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)'])))

# Parse output file
for output in process.all_outputs(unique=True):
Expand All @@ -21,7 +21,7 @@ def results_qc(lims, process_id):

measurements = {}
sample_measurements = {}
for line in lims.get_file_contents(tecan_result_file.id).data.split('\n'):
for line in lims.get_file_contents(tecan_result_file.id).data.decode('utf-8').split('\n'):
if not line.startswith('<>'):
data = line.rstrip().split('\t')
for index, value in enumerate(data[1:]):
Expand Down
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
'Dx NextSeq Run v1.0', 'Dx NextSeq Run v1.1',
'Dx Automated NovaSeq Run (standaard) v1.0', 'Dx Automated NovaSeq Run (standaard) v1.1',
'AUTOMATED - NovaSeq Run (NovaSeq 6000 v3.1)',
'Dx NovaSeqXPlus Run v1.0'
]

# BCLConvert conversion settings
Expand Down
Loading