diff --git a/clarity_epp.py b/clarity_epp.py index dfc4ee6..5427656 100755 --- a/clarity_epp.py +++ b/clarity_epp.py @@ -48,7 +48,9 @@ def export_hamilton(args): def export_illumina(args): """Export (updated) illumina samplesheet.""" - clarity_epp.export.illumina.update_samplesheet(lims, args.process_id, args.artifact_id, args.output_file) + clarity_epp.export.illumina.update_samplesheet( + lims, args.process_id, args.artifact_id, args.output_file, args.conversion_tool + ) def export_labels(args): @@ -59,6 +61,8 @@ def export_labels(args): clarity_epp.export.labels.container_sample(lims, args.process_id, args.output_file, args.description) elif args.type == 'storage_location': clarity_epp.export.labels.storage_location(lims, args.process_id, args.output_file) + elif args.type == 'nunc_mix_sample': + clarity_epp.export.labels.nunc_mix_sample(lims, args.process_id, args.output_file) def export_magnis(args): @@ -92,6 +96,8 @@ def export_manual_pipetting(args): clarity_epp.export.manual_pipetting.samplesheet_pool_samples(lims, args.process_id, args.output_file) elif args.type == 'pool_magnis_pools': clarity_epp.export.manual_pipetting.samplesheet_pool_magnis_pools(lims, args.process_id, args.output_file) + elif args.type == 'normalization_mix': + clarity_epp.export.manual_pipetting.samplesheet_normalization_mix(lims, args.process_id, args.output_file) def export_ped_file(args): @@ -151,6 +157,8 @@ def upload_tecan_results(args): clarity_epp.upload.tecan.results_qc(lims, args.process_id) elif args.type == 'purify_normalise': clarity_epp.upload.tecan.results_purify_normalise(lims, args.process_id) + elif args.type == 'purify_mix': + clarity_epp.upload.tecan.results_purify_mix(lims, args.process_id) def upload_tapestation_results(args): @@ -195,12 +203,14 @@ def placement_automatic(args): clarity_epp.placement.plate.copy_layout(lims, args.process_id) -def placement_artifact_set_name(args): - """Change artifact name to sequence name.""" +def placement_artifact_set(args): + """Change artifact name or udf.""" if args.type == 'sequence_name': clarity_epp.placement.artifact.set_sequence_name(lims, args.process_id) elif args.type == 'run_id': clarity_epp.placement.artifact.set_runid_name(lims, args.process_id) + elif args.type == 'norm_udf': + clarity_epp.placement.artifact.set_norm_manual_udf(lims, args.process_id) def placement_route_artifact(args): @@ -219,6 +229,11 @@ def placement_unpooling(args): clarity_epp.placement.pool.unpooling(lims, args.process_id) +def placement_patient_pools(args): + """Create patient pools for Dx samples.""" + clarity_epp.placement.pool.create_patient_pools(lims, args.process_id) + + def placement_complete_step(args): """Complete protocol step (Dx Mark protocol complete).""" clarity_epp.placement.step.finish_protocol_complete(lims, args.process_id) @@ -229,6 +244,11 @@ def placement_tecan(args): clarity_epp.placement.tecan.place_artifacts(lims, args.process_id) +def placement_pipetting(args): + """Check pipetted input and output nuncs.""" + clarity_epp.placement.pipetting.check_nunc_input_nunc_output(lims, args.process_id) + + if __name__ == "__main__": parser = argparse.ArgumentParser() subparser = parser.add_subparsers() @@ -271,10 +291,16 @@ def placement_tecan(args): ) parser_export_illumina.add_argument('process_id', help='Clarity lims process id') parser_export_illumina.add_argument('artifact_id', help='Clarity lims samplesheet artifact id') + parser_export_illumina.add_argument( + '-c', '--conversion_tool', choices=['bcl2fastq', 'bclconvert'], default='bcl2fastq', help='Illumina conversion tool' + ) parser_export_illumina.set_defaults(func=export_illumina) parser_export_labels = subparser_export.add_parser('labels', help='Export container labels', parents=[output_parser]) - parser_export_labels.add_argument('type', choices=['container', 'container_sample', 'storage_location'], help='Label type') + parser_export_labels.add_argument( + 'type', + choices=['container', 'container_sample', 'storage_location', 'nunc_mix_sample'], + help='Label type') parser_export_labels.add_argument('process_id', help='Clarity lims process id') parser_export_labels.add_argument('-d', '--description', nargs='?', help='Container name description') parser_export_labels.set_defaults(func=export_labels) @@ -293,7 +319,7 @@ def placement_tecan(args): choices=[ 'purify', 'dilute_library_pool', 'multiplex_library_pool', 'multiplex_sequence_pool', 'normalization', 'capture', 'exonuclease', 'pcr_exonuclease', 'mip_multiplex_pool', 'mip_dilute_pool', 'pool_samples', - 'pool_magnis_pools' + 'pool_magnis_pools', 'normalization_mix' ], help='Samplesheet type' ) @@ -338,7 +364,9 @@ def placement_tecan(args): parser_export_tecan = subparser_export.add_parser('tecan', help='Create tecan samplesheets', parents=[output_parser]) parser_export_tecan.add_argument('process_id', help='Clarity lims process id') - parser_export_tecan.add_argument('type', choices=['qc', 'purify_normalise'], help='Samplesheet type') + parser_export_tecan.add_argument( + 'type', choices=['qc', 'purify_normalise', 'filling_out_purify', 'normalise'], help='Samplesheet type' + ) parser_export_tecan.set_defaults(func=export_tecan) parser_export_workflow = subparser_export.add_parser( @@ -366,7 +394,7 @@ def placement_tecan(args): parser_upload_tecan = subparser_upload.add_parser('tecan', help='Upload tecan results') parser_upload_tecan.add_argument('process_id', help='Clarity lims process id') - parser_upload_tecan.add_argument('type', choices=['qc', 'purify_normalise'], help='Tecan process type') + parser_upload_tecan.add_argument('type', choices=['qc', 'purify_normalise', 'purify_mix'], help='Tecan process type') parser_upload_tecan.set_defaults(func=upload_tecan_results) parser_upload_magnis = subparser_upload.add_parser('magnis', help='Upload magnis results') @@ -404,9 +432,9 @@ def placement_tecan(args): parser_placement_automatic.set_defaults(func=placement_automatic) parser_placement_artifact = subparser_placement.add_parser('artifact', help='Change artifact name to sequence name') - parser_placement_artifact.add_argument('type', choices=['sequence_name', 'run_id'], help='Check type') + parser_placement_artifact.add_argument('type', choices=['sequence_name', 'run_id', 'norm_udf'], help='Check type') parser_placement_artifact.add_argument('process_id', help='Clarity lims process id') - parser_placement_artifact.set_defaults(func=placement_artifact_set_name) + parser_placement_artifact.set_defaults(func=placement_artifact_set) parser_placement_route_artifact = subparser_placement.add_parser('route_artifact', help='Route artifact to a workflow') parser_placement_route_artifact.add_argument('process_id', help='Clarity lims process id') @@ -428,9 +456,17 @@ def placement_tecan(args): parser_placement_unpooling.add_argument('process_id', help='Clarity lims process id') parser_placement_unpooling.set_defaults(func=placement_unpooling) + parser_placement_patient_pools = subparser_placement.add_parser('patient_pools', help='Create patient pools for Dx samples') + parser_placement_patient_pools.add_argument('process_id', help='Clarity lims process id') + parser_placement_patient_pools.set_defaults(func=placement_patient_pools) + parser_placement_tecan = subparser_placement.add_parser('tecan', help='Placement of samples in tecan step') parser_placement_tecan.add_argument('process_id', help='Clarity lims process id') parser_placement_tecan.set_defaults(func=placement_tecan) + parser_placement_pipetting = subparser_placement.add_parser('pipetting', help='Check pipetting input and output') + parser_placement_pipetting.add_argument('process_id', help='Clarity lims process id') + parser_placement_pipetting.set_defaults(func=placement_pipetting) + args = parser.parse_args() args.func(args) diff --git a/clarity_epp/__init__.py b/clarity_epp/__init__.py index 2380286..9c033f3 100644 --- a/clarity_epp/__init__.py +++ b/clarity_epp/__init__.py @@ -4,41 +4,69 @@ from email.mime.multipart import MIMEMultipart from email.mime.base import MIMEBase from email.mime.text import MIMEText +import re import smtplib import mimetypes +from genologics.entities import Artifact + + +def get_sequence_name(artifact): + """Generate sequence name, for combined or single samples.""" + sample_numbers = [] + for sample in artifact.samples: + if 'Dx Monsternummer' in sample.udf: # Use monsternummer for Dx samples + sample_numbers.append(sample.udf['Dx Monsternummer']) + + if sample_numbers: # Merge monsternummer for Dx samples + sequence_name = '-'.join(sorted(sample_numbers)) + else: # Use sample name for non Dx samples + sequence_name = artifact.samples[0].name -def get_sequence_name(sample): - """Generate sequence name.""" - try: - # Set fam_status - if sample.udf['Dx Familie status'] == 'Kind': - fam_status = 'C' - elif sample.udf['Dx Familie status'] == 'Ouder': - fam_status = 'P' - - # Set sex - if sample.udf['Dx Geslacht'] == 'Man': - sex = 'M' - elif sample.udf['Dx Geslacht'] == 'Vrouw': - sex = 'F' - elif sample.udf['Dx Geslacht'] == 'Onbekend': - sex = 'O' - except KeyError: # None DX sample, use sample.name as sequence name. - sequence_name = sample.name - else: - if not sample.name.startswith(sample.udf['Dx Familienummer']): - sequence_name = '{familienummer}{fam_status}{sex}{monsternummer}'.format( - familienummer=sample.udf['Dx Familienummer'], - fam_status=fam_status, - sex=sex, - monsternummer=sample.udf['Dx Monsternummer'] - ) - else: - sequence_name = sample.name return sequence_name +def get_sample_artifacts_from_pool(lims, pool_artifact): + """Get sample artifacts from (sequence) pool.""" + sample_artifacts = [] + pool_artifact_demux = lims.get(pool_artifact.uri + '/demux') + for node in pool_artifact_demux.getiterator('artifact'): + if node.find('samples'): + if len(node.find('samples').findall('sample')) in [1, 2]: + sample_artifact = Artifact(lims, uri=node.attrib['uri']) + + # Check if sample_artifact with 2 samples are from the same person + if len(sample_artifact.samples) == 2: + if ( + 'Dx Persoons ID' in sample_artifact.samples[0].udf or + 'Dx Persoons ID' in sample_artifact.samples[1].udf or + sample_artifact.samples[0].udf['Dx Persoons ID'] == sample_artifact.samples[1].udf['Dx Persoons ID'] + ): + sample_artifacts.append(sample_artifact) + else: + sample_artifacts.append(sample_artifact) + return sample_artifacts + + +def get_mix_sample_barcode(artifact): + """Generate mix sample shortened barcode name.""" + sample_names = {} + for sample in artifact.samples: + if 'Dx Monsternummer' in sample.udf: + monster = sample.udf['Dx Monsternummer'] + if re.match(r'\d{4}D\d+', monster): + sample_names[sample] = monster[2:4], monster[5:] + elif monster.startswith('D'): + sample_names[sample] = monster + + barcode_name = '' + if sample_names: + for sample in artifact.samples: + barcode_name += ''.join(sample_names[sample]) + + return barcode_name + + def send_email(server, sender, receivers, subject, text, attachment=None): """Send emails.""" mail = MIMEMultipart() diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py index e3f5aa8..11aff7c 100644 --- a/clarity_epp/export/illumina.py +++ b/clarity_epp/export/illumina.py @@ -1,15 +1,16 @@ """Illumina export functions.""" import operator import re +import csv from genologics.entities import Process, Artifact -from .. import get_sequence_name +from .. import get_sequence_name, get_sample_artifacts_from_pool import clarity_epp.export.utils import config -def update_samplesheet(lims, process_id, artifact_id, output_file): +def update_samplesheet(lims, process_id, artifact_id, output_file, conversion_tool): """Update illumina samplesheet.""" process = Process(lims, id=process_id) trim_last_base = True # Used to set Read1EndWithCycle @@ -28,8 +29,10 @@ def get_project(projects, urgent=False): # Parse families families = {} - for artifact in process.all_inputs(): - for sample in artifact.samples: + sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0]) + + for sample_artifact in sample_artifacts: + for sample in sample_artifact.samples: if ( 'Dx Familienummer' in list(sample.udf) and 'Dx NICU Spoed' in list(sample.udf) and @@ -117,8 +120,9 @@ def get_project(projects, urgent=False): 'deviating': False } - # Add sample to family - families[family]['samples'].append(sample) + # Add sample_artifact to family + if sample_artifact not in families[family]['samples']: + families[family]['samples'].append(sample_artifact) # Get all project types and count samples project_types = {} @@ -148,18 +152,20 @@ def get_project(projects, urgent=False): # Urgent families / samples, skip deviating for family in [family for family in families.values() if family['urgent'] and not family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects'], urgent=True) - for sample in family['samples']: - sample_sequence_name = get_sequence_name(sample) - sample_sequence_names[sample.name] = sample_sequence_name + for sample_artifact in family['samples']: + sample_sequence_name = get_sequence_name(sample_artifact) + for sample in sample_artifact.samples: + sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Deviating families / samples for family in [family for family in families.values() if family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects']) - for sample in family['samples']: - sample_sequence_name = get_sequence_name(sample) - sample_sequence_names[sample.name] = sample_sequence_name + for sample_artifact in family['samples']: + sample_sequence_name = get_sequence_name(sample_artifact) + for sample in sample_artifact.samples: + sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 @@ -167,9 +173,10 @@ def get_project(projects, urgent=False): normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']] for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True): family_project = get_project(project_types[family['project_type']]['projects']) - for sample in family['samples']: - sample_sequence_name = get_sequence_name(sample) - sample_sequence_names[sample.name] = sample_sequence_name + for sample_artifact in family['samples']: + sample_sequence_name = get_sequence_name(sample_artifact) + for sample in sample_artifact.samples: + sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 @@ -186,21 +193,74 @@ def get_project(projects, urgent=False): samplesheet_artifact = Artifact(lims, id=artifact_id) file_id = samplesheet_artifact.files[0].id - for line in lims.get_file_contents(id=file_id).rstrip().split('\n'): - if line.startswith('[Settings]') and trim_last_base: - output_file.write('{line}\n'.format(line=line)) - output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) - output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) + # Setup custom settings + custom_settings = '' + + if conversion_tool == 'bcl2fastq' and trim_last_base: + custom_settings = ( + 'Read1EndWithCycle,{read_1_value}\n' + 'Read2EndWithCycle,{read_2_value}\n' + ).format( + read_1_value=process.udf['Read 1 Cycles']-1, read_2_value=process.udf['Read 2 Cycles']-1 + ) + + elif conversion_tool == 'bclconvert': + # Setup OverrideCycles + if trim_last_base or process.udf['UMI - Trim']: + override_cycles = [ + '', # read 1 + 'I{0}'.format(process.udf['Index Read 1']), # index 1 + 'I{0}'.format(process.udf['Index Read 2']), # index 2 + '', # read 2 + ] + + if trim_last_base and process.udf['UMI - Trim']: + override_cycles[0] = 'U{umi}Y{read}N1'.format( + umi=process.udf['UMI - Read 1 Length'], + read=process.udf['Read 1 Cycles'] - process.udf['UMI - Read 1 Length'] - 1 + ) + override_cycles[3] = 'U{umi}Y{read}N1'.format( + umi=process.udf['UMI - Read 2 Length'], + read=process.udf['Read 2 Cycles'] - process.udf['UMI - Read 2 Length'] - 1 + ) + custom_settings = 'TrimUMI,1\n' + + elif trim_last_base: + override_cycles[0] = 'Y{read}N1'.format(read=process.udf['Read 1 Cycles'] - 1) + override_cycles[3] = 'Y{read}N1'.format(read=process.udf['Read 2 Cycles'] - 1) + + elif process.udf['UMI - Trim']: + override_cycles[0] = 'U{umi}Y{read}'.format( + umi=process.udf['UMI - Read 1 Length'], + read=process.udf['Read 1 Cycles'] - process.udf['UMI - Read 1 Length'] + ) + override_cycles[3] = 'U{umi}Y{read}'.format( + umi=process.udf['UMI - Read 2 Length'], + read=process.udf['Read 2 Cycles'] - process.udf['UMI - Read 2 Length'] + ) + custom_settings = 'TrimUMI,1\n' + + custom_settings = '{settings}OverrideCycles,{override_cycles}\n'.format( + settings=custom_settings, + override_cycles=';'.join(override_cycles) + ) + + for data in csv.reader( + lims.get_file_contents(id=file_id).rstrip().split('\n'), + quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True + ): + if data[0] == '[Settings]' and custom_settings: + output_file.write('{line}\n'.format(line=','.join(data))) + output_file.write(custom_settings) settings_section = True - elif line.startswith('[Data]') and trim_last_base and not settings_section: + elif data[0] == '[Data]' and custom_settings and not settings_section: output_file.write('[Settings]\n') - output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) - output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) - output_file.write('{line}\n'.format(line=line)) + output_file.write(custom_settings) + output_file.write('{line}\n'.format(line=','.join(data))) - elif line.startswith('Sample_ID'): # Samples header line - sample_header = line.rstrip().split(',') + elif data[0] == 'Sample_ID': # Samples header line + sample_header = data sample_id_index = sample_header.index('Sample_ID') sample_name_index = sample_header.index('Sample_Name') sample_project_index = sample_header.index('Sample_Project') @@ -210,14 +270,12 @@ def get_project(projects, urgent=False): else: index_index = sample_header.index('index') - output_file.write('{line}\n'.format(line=line)) + output_file.write('{line}\n'.format(line=','.join(data))) elif sample_header: # Samples header seen, so continue with samples. - data = line.rstrip().split(',') - - # Fix sample name -> use sequence name - if data[sample_name_index] in sample_sequence_names: - data[sample_name_index] = sample_sequence_names[data[sample_name_index]] + sample_name = data[sample_name_index].split(',')[0] + if sample_name in sample_sequence_names: + data[sample_name_index] = sample_sequence_names[sample_name] # Set Sample_Project if data[sample_name_index] in sample_projects: @@ -232,4 +290,4 @@ def get_project(projects, urgent=False): output_file.write('{line}\n'.format(line=','.join(data))) else: # Leave other lines untouched. - output_file.write('{line}\n'.format(line=line)) + output_file.write('{line}\n'.format(line=','.join(data))) diff --git a/clarity_epp/export/labels.py b/clarity_epp/export/labels.py index f63ae8f..28c5320 100644 --- a/clarity_epp/export/labels.py +++ b/clarity_epp/export/labels.py @@ -2,6 +2,8 @@ from genologics.entities import Process +from .. import get_mix_sample_barcode + def container(lims, process_id, output_file, description=''): """Generate container label file.""" @@ -9,7 +11,10 @@ def container(lims, process_id, output_file, description=''): for index, container in enumerate(sorted(process.output_containers(), key=lambda container: container.id, reverse=True)): if description: if ',' in description: - output_file.write('{description}\t{container}\r\n'.format(description=description.split(',')[index], container=container.name)) + output_file.write('{description}\t{container}\r\n'.format( + description=description.split(',')[index], + container=container.name + )) else: output_file.write('{description}\t{container}\r\n'.format(description=description, container=container.name)) else: @@ -22,7 +27,11 @@ def container_sample(lims, process_id, output_file, description=''): for container in process.output_containers(): for artifact in container.placements.values(): if description: - output_file.write('{description}\t{sample}\t{container}\r\n'.format(description=description, container=container.name, sample=artifact.name)) + output_file.write('{description}\t{sample}\t{container}\r\n'.format( + description=description, + container=container.name, + sample=artifact.name + )) else: output_file.write('{sample}\t{container}\r\n'.format(container=container.name, sample=artifact.name)) @@ -32,11 +41,40 @@ def storage_location(lims, process_id, output_file): process = Process(lims, id=process_id) # Write header - output_file.write('Bakje\tpos\n') + output_file.write('Bakje\tpos\r\n') for artifact in process.analytes()[0]: - storage_location = artifact.samples[0].udf['Dx Opslaglocatie'].split() - output_file.write('{tray}\t{pos}\n'.format( - tray=storage_location[0][2:6], # Select 4 digits from: CB[1-9][1-9][1-9][1-9]KK - pos=storage_location[1] - )) + for sample in artifact.samples: + storage_location = sample.udf['Dx Opslaglocatie'].split() + output_file.write('{tray}\t{pos}\r\n'.format( + tray=storage_location[0][2:6], # Select 4 digits from: CB[1-9][1-9][1-9][1-9]KK + pos=storage_location[1] + )) + + +def nunc_mix_sample(lims, process_id, output_file): + """Generate (mix) sample nunc label file.""" + process = Process(lims, id=process_id) + + # Write empty header + output_file.write('\r\n') + + for artifact in process.analytes()[0]: + well = ''.join(artifact.location[1].split(':')) + sample_mix = False + if len(artifact.samples) > 1: + sample_mix = True + + if sample_mix: + barcode_name = get_mix_sample_barcode(artifact) + output_file.write('{sample};;;;;{container}:{well};;1\r\n'.format( + sample=barcode_name, + container=artifact.container.name, + well=well + )) + else: + output_file.write('{sample};;;;;{container}:{well};;1\r\n'.format( + sample=artifact.samples[0].udf['Dx Fractienummer'], + container=artifact.container.name, + well=well + )) \ No newline at end of file diff --git a/clarity_epp/export/manual_pipetting.py b/clarity_epp/export/manual_pipetting.py index a8e249d..75572b6 100755 --- a/clarity_epp/export/manual_pipetting.py +++ b/clarity_epp/export/manual_pipetting.py @@ -3,6 +3,7 @@ from genologics.entities import Process +from .. import get_mix_sample_barcode import clarity_epp.export.utils @@ -328,6 +329,7 @@ def samplesheet_multiplex_sequence_pool(lims, process_id, output_file): final_volume = float(process.udf['Final volume'].split()[0]) for input_pool in process.all_inputs(): + input_pool_sample_ids = [] input_pool_conc = float(input_pool.udf['Dx Concentratie fluorescentie (ng/ul)']) input_pool_size = float(input_pool.udf['Dx Fragmentlengte (bp)']) input_pool_nM = (input_pool_conc * 1000 * (1.0/660.0) * (1/input_pool_size)) * 1000 @@ -336,10 +338,18 @@ def samplesheet_multiplex_sequence_pool(lims, process_id, output_file): input_pool_sample_count = 0 for sample in input_pool.samples: + # Check persoons ID to skip duplicate samples + if 'Dx Persoons ID' in sample.udf: + if sample.udf['Dx Persoons ID'] in input_pool_sample_ids: + continue # skip to next sample + else: + input_pool_sample_ids.append(sample.udf['Dx Persoons ID']) + if 'Dx Exoomequivalent' in sample.udf: input_pool_sample_count += sample.udf['Dx Exoomequivalent'] else: input_pool_sample_count += 1 + total_sample_count += input_pool_sample_count input_pools.append({ 'name': input_pool.name, @@ -689,23 +699,153 @@ def samplesheet_pool_magnis_pools(lims, process_id, output_file): """Create manual pipetting samplesheet for pooling magnis pools. Correct for pools with < 8 samples""" process = Process(lims, id=process_id) + # set up multiplier + multiplier = 1 + if 'Run type' in process.udf: + run_type = re.search(r'\(\*.+\)' ,process.udf['Run type']) + if run_type: + multiplier = float(run_type.string[run_type.start()+2:run_type.end()-1]) + # print header output_file.write('Pool\tContainer\tSample count\tVolume (ul)\n') # Get input pools, sort by name and print volume for input_artifact in sorted(process.all_inputs(resolve=True), key=lambda artifact: artifact.id): sample_count = 0 + input_artifact_sample_ids = [] for sample in input_artifact.samples: + # Check persoons ID to skip duplicate samples + if 'Dx Persoons ID' in sample.udf: + if sample.udf['Dx Persoons ID'] in input_artifact_sample_ids: + continue # skip to next sample + else: + input_artifact_sample_ids.append(sample.udf['Dx Persoons ID']) + if 'Dx Exoomequivalent' in sample.udf: sample_count += sample.udf['Dx Exoomequivalent'] else: sample_count += 1 output_file.write( - '{pool}\t{container}\t{sample_count}\t{volume}\n'.format( + '{pool}\t{container}\t{sample_count}\t{volume:.2f}\n'.format( pool=input_artifact.name, container=input_artifact.container.name, sample_count=sample_count, - volume=sample_count * 1.25 + volume=sample_count * 1.25 * multiplier ) ) + + +def samplesheet_normalization_mix(lims, process_id, output_file): + """"Create manual pipetting samplesheet for normalizing mix fraction samples.""" + process = Process(lims, id=process_id) + + output_file.write( + 'Fractienummer\tConcentratie (ng/ul)\tVolume sample (ul)\tVolume low TE (ul)\tContainer_tube\n' + ) + + samples = {} + + # Find all QC process types + qc_process_types = clarity_epp.export.utils.get_process_types(lims, ['Dx Qubit QC', 'Dx Tecan Spark 10M QC']) + + # Find concentration in last QC process + for input_artifact in process.all_inputs(): + for input_sample in input_artifact.samples: + qc_processes = lims.get_processes(type=qc_process_types, inputartifactlimsid=input_artifact.id) + if qc_processes: + qc_process = sorted(qc_processes, key=lambda process: int(process.id.split('-')[-1]))[-1] + for qc_artifact in qc_process.outputs_per_input(input_artifact.id): + if input_sample.name in qc_artifact.name: + for qc_sample in qc_artifact.samples: + if qc_sample.name == input_sample.name: + concentration = float(qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) + + else: + parent_process = input_artifact.parent_process + for parent_artifact in parent_process.all_inputs(): + if parent_artifact.name == input_sample.name: + qc_processes = lims.get_processes(type=qc_process_types, inputartifactlimsid=parent_artifact.id) + if qc_processes: + qc_process = sorted(qc_processes, key=lambda process: int(process.id.split('-')[-1]))[-1] + for qc_artifact in qc_process.outputs_per_input(parent_artifact.id): + if input_sample.name in qc_artifact.name: + for qc_sample in qc_artifact.samples: + if qc_sample.name == input_sample.name: + concentration = float(qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) + else: + # No QC process found, use Helix concentration + concentration = input_sample.udf['Dx Concentratie (ng/ul)'] + + samples[input_sample.udf['Dx Monsternummer']] = {'conc': concentration} + + # Calculation of pipetting volumes + for input_artifact in process.all_inputs(): + output_artifact = process.outputs_per_input(input_artifact.id, Analyte=True)[0] # assume one artifact per input + dividend = float(output_artifact.udf['Dx Input (ng)']) / len(input_artifact.samples) + minuend = float(output_artifact.udf['Dx Eindvolume (ul)']) / len(input_artifact.samples) + sample_mix = False + + if len(input_artifact.samples) > 1: + sample_mix = True + + input_sample_1 = input_artifact.samples[0] + if 'Dx sample vol. #1' in output_artifact.udf: + dividend_sample_1 = ( + samples[input_sample_1.udf['Dx Monsternummer']]['conc'] * float(output_artifact.udf['Dx sample vol. #1']) + ) + else: + dividend_sample_1 = dividend + sample_volume = dividend_sample_1 / samples[input_sample_1.udf['Dx Monsternummer']]['conc'] + if sample_volume > minuend: + low_te_volume = 0 + else: + low_te_volume = minuend - sample_volume + samples[input_sample_1.udf['Dx Monsternummer']]['sample_volume'] = sample_volume + samples[input_sample_1.udf['Dx Monsternummer']]['low_te_volume'] = low_te_volume + + if sample_mix: + input_sample_2 = input_artifact.samples[1] + if 'Dx sample vol. #2' in output_artifact.udf: + dividend_sample_2 = ( + samples[input_sample_2.udf['Dx Monsternummer']]['conc'] * float(output_artifact.udf['Dx sample vol. #2']) + ) + else: + dividend_sample_2 = dividend + sample_volume = dividend_sample_2 / samples[input_sample_2.udf['Dx Monsternummer']]['conc'] + if sample_volume > minuend: + low_te_volume = 0 + else: + low_te_volume = minuend - sample_volume + samples[input_sample_2.udf['Dx Monsternummer']]['sample_volume'] = sample_volume + samples[input_sample_2.udf['Dx Monsternummer']]['low_te_volume'] = low_te_volume + + # Compose output per sample in well + output = {} + for output_artifact in process.all_outputs(): + if output_artifact.type == 'Analyte': + for output_sample in output_artifact.samples: + monster = output_sample.udf['Dx Monsternummer'] + if len(output_artifact.samples) > 1: + container = get_mix_sample_barcode(output_artifact) + else: + container = output_sample.udf['Dx Fractienummer'] + well = ''.join(output_artifact.location[1].split(':')) + output_data = ( + '{sample}\t{concentration:.2f}\t{sample_volume:.2f}\t{low_te_volume:.2f}\t{container}\n'.format( + sample=output_sample.udf['Dx Fractienummer'], + concentration=samples[monster]['conc'], + sample_volume=samples[monster]['sample_volume'], + low_te_volume=samples[monster]['low_te_volume'], + container=container + ) + ) + if well in output: + output[well][monster] = output_data + else: + output[well] = {monster: output_data} + + # Write output file per sample sorted for well + for well in clarity_epp.export.utils.sort_96_well_plate(output.keys()): + for sample in output[well]: + output_file.write(output[well][sample]) \ No newline at end of file diff --git a/clarity_epp/export/merge.py b/clarity_epp/export/merge.py index 8f71555..bab9afd 100644 --- a/clarity_epp/export/merge.py +++ b/clarity_epp/export/merge.py @@ -1,26 +1,28 @@ """Export merge file functions.""" from genologics.entities import Process -from .. import get_sequence_name +from .. import get_sequence_name, get_sample_artifacts_from_pool def create_file(lims, process_id, output_file): """Create mege file.""" process = Process(lims, id=process_id) - samples = process.analytes()[0][0].samples + sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0]) output_file.write('Sample\tMerge 1 Sample\tMerge 1 Sequencing Run\tMerge 2 Sample\tMerge 2 Sequencing Run\n') - for sample in samples: - sample_merge = [] - if 'Dx Mergen' in sample.udf and sample.udf['Dx Mergen']: - for udf in ['Dx Merge 1 Samplenaam', 'Dx Merge 1 Runnaam', 'Dx Merge 2 Samplenaam', 'Dx Merge 2 Runnaam']: - if udf in sample.udf: - sample_merge.append(sample.udf[udf]) - else: - sample_merge.append('') + # for sample in samples: + for sample_artifact in sample_artifacts: + for sample in sample_artifact.samples: # Asumme one sample per sample_artifact contains merge information + sample_merge = [] + if 'Dx Mergen' in sample.udf and sample.udf['Dx Mergen']: + for udf in ['Dx Merge 1 Samplenaam', 'Dx Merge 1 Runnaam', 'Dx Merge 2 Samplenaam', 'Dx Merge 2 Runnaam']: + if udf in sample.udf: + sample_merge.append(sample.udf[udf]) + else: + sample_merge.append('') - output_file.write('{sample}\t{merge}\n'.format( - sample=get_sequence_name(sample), - merge='\t'.join(sample_merge) - )) + output_file.write('{sample}\t{merge}\n'.format( + sample=get_sequence_name(sample_artifact), + merge='\t'.join(sample_merge) + )) diff --git a/clarity_epp/export/ped.py b/clarity_epp/export/ped.py index 7dd42dd..836d7af 100644 --- a/clarity_epp/export/ped.py +++ b/clarity_epp/export/ped.py @@ -1,20 +1,21 @@ """Export ped functions.""" from genologics.entities import Process -from .. import get_sequence_name +from .. import get_sequence_name, get_sample_artifacts_from_pool def create_file(lims, process_id, output_file): """Create ped file.""" process = Process(lims, id=process_id) - samples = process.analytes()[0][0].samples - + sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0]) ped_families = {} - for sample in samples: + for sample_artifact in sample_artifacts: + sample = sample_artifact.samples[0] # Asume all samples metadata is identical. + if 'Dx Familienummer' in sample.udf and sample.udf['Dx Onderzoeksreden'] != 'Research': family = sample.udf['Dx Familienummer'] - sample_name = get_sequence_name(sample) + sample_name = get_sequence_name(sample_artifact) ped_sample = {'name': sample_name} if family not in ped_families: diff --git a/clarity_epp/export/sample.py b/clarity_epp/export/sample.py index 84d6844..74bee68 100644 --- a/clarity_epp/export/sample.py +++ b/clarity_epp/export/sample.py @@ -4,7 +4,6 @@ from genologics.entities import Process import clarity_epp.export.utils -from .. import get_sequence_name def removed_samples(lims, output_file): diff --git a/clarity_epp/export/tecan.py b/clarity_epp/export/tecan.py index 2708dc7..814fda4 100755 --- a/clarity_epp/export/tecan.py +++ b/clarity_epp/export/tecan.py @@ -2,6 +2,7 @@ from genologics.entities import Process +from .. import get_mix_sample_barcode import clarity_epp.export.utils @@ -39,3 +40,111 @@ def samplesheet(lims, process_id, type, output_file): well=well, index=clarity_epp.export.utils.get_well_index(well, one_based=True) )) + + elif type == 'filling_out_purify': + # Samplesheet Tecan Fluent 480 'Dx Uitvullen en zuiveren' (mix) samples + output_file.write( + 'SourceTubeID;VolSample;VolWater;PositionIndex;MengID\n' + ) + + # Find all QC process types + qc_process_types = clarity_epp.export.utils.get_process_types(lims, ['Dx Qubit QC', 'Dx Tecan Spark 10M QC']) + + samples = {} + # Find concentration in last QC process + for input_artifact in process.all_inputs(): + for input_sample in input_artifact.samples: + qc_processes = lims.get_processes(type=qc_process_types, inputartifactlimsid=input_artifact.id) + if qc_processes: + qc_process = sorted(qc_processes, key=lambda process: int(process.id.split('-')[-1]))[-1] + for qc_artifact in qc_process.outputs_per_input(input_artifact.id): + if input_sample.name in qc_artifact.name: + for qc_sample in qc_artifact.samples: + if qc_sample.name == input_sample.name: + concentration = float(qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) + + else: + parent_process = input_artifact.parent_process + for parent_artifact in parent_process.all_inputs(): + if parent_artifact.name == input_sample.name: + qc_processes = lims.get_processes(type=qc_process_types, inputartifactlimsid=parent_artifact.id) + if qc_processes: + qc_process = sorted(qc_processes, key=lambda process: int(process.id.split('-')[-1]))[-1] + for qc_artifact in qc_process.outputs_per_input(parent_artifact.id): + if input_sample.name in qc_artifact.name: + for qc_sample in qc_artifact.samples: + if qc_sample.name == input_sample.name: + concentration = float(qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) + else: + # No QC process found, use Helix concentration + concentration = input_sample.udf['Dx Concentratie (ng/ul)'] + + samples[input_sample.udf['Dx Monsternummer']] = {'conc': concentration} + + for well in clarity_epp.export.utils.sort_96_well_plate(well_plate.keys()): + artifact = well_plate[well] + sample_mix = False + if len(artifact.samples) > 1: + sample_mix = True + + if sample_mix: + dividend = 880 + max_volume = 30 + else: + dividend = 1760 + max_volume = 60 + + for sample in artifact.samples: + monster = sample.udf['Dx Monsternummer'] + samples[monster]['message'] = '' + if sample_mix: + samples[monster]['mix_names'] = artifact.name + else: + samples[monster]['mix_names'] = monster + + # Calculation of pipetting volumes + calc_sample = dividend / samples[monster]['conc'] + if calc_sample < 4: + volume_sample = 4 + elif calc_sample > max_volume: + volume_sample = max_volume + samples[monster]['message'] = ( + 'Conc. too low - volume= {calc_sample} ul'.format(calc_sample=calc_sample) + ) + else: + volume_sample = calc_sample + samples[monster]['sample_volume'] = volume_sample + volume_water = max_volume - volume_sample + samples[monster]['water_volume'] = volume_water + + for sample in artifact.samples: + monster = sample.udf['Dx Monsternummer'] + output_file.write('{sample};{volume_sample:.2f};{volume_water:.2f};{index};{name};{empty};{message}\n'.format( + sample=sample.udf['Dx Fractienummer'], + volume_sample=samples[monster]['sample_volume'], + volume_water=samples[monster]['water_volume'], + index=clarity_epp.export.utils.get_well_index(well, one_based=True), + name=samples[monster]['mix_names'], + empty='', + message=samples[monster]['message'] + )) + + elif type == 'normalise': + output_file.write('SourceTubeID;PositionID;PositionIndex\n') + outputs = [] + for output in process.all_outputs(): + if output.name not in ['Dx labels nunc', 'Dx pipetteerschema manueel normaliseren', 'Dx Fluent480 samplesheet manueel normaliseren']: + outputs.append(output.name) + for well in clarity_epp.export.utils.sort_96_well_plate(well_plate.keys()): + artifact = well_plate[well] + if artifact.name in outputs: + if len(artifact.samples) > 1: + source_tube = get_mix_sample_barcode(artifact) + else: + sample = artifact.samples[0] + source_tube = sample.udf['Dx Fractienummer'] + output_file.write('{sample};{well};{index}\n'.format( + sample=source_tube, + well=well, + index=clarity_epp.export.utils.get_well_index(well, one_based=True) + )) \ No newline at end of file diff --git a/clarity_epp/export/workflow.py b/clarity_epp/export/workflow.py index a399c4d..b7c62f7 100644 --- a/clarity_epp/export/workflow.py +++ b/clarity_epp/export/workflow.py @@ -31,61 +31,71 @@ def helix_magnis(lims, process_id, output_file): for artifact in process.all_inputs(): for sample in artifact.samples: - if 'Dx Werklijstnummer' in sample.udf: # Only check samples with a 'Werklijstnummer' - sample_artifacts = lims.get_artifacts(samplelimsid=sample.id, type='Analyte') - # Filter artifacts without parent_process - sample_artifacts = [sample_artifact for sample_artifact in sample_artifacts if sample_artifact.parent_process] - # Sort artifact by parent process id - sample_artifacts = sorted( - sample_artifacts, - key=lambda artifact: int(artifact.parent_process.id.split('-')[-1]) - ) - - sample_all_processes = {} - # reset after 'Dx Sample registratie zuivering' process - # this is a new import from helix, should not be counted as a repeat - sample_filter_processes = {} - - for sample_artifact in sample_artifacts: - if 'Dx Sample registratie zuivering' in sample_artifact.parent_process.type.name: - sample_filter_processes = {} # reset after new helix import - process_id = sample_artifact.parent_process.id - process_name = sample_artifact.parent_process.type.name - - if process_name in sample_all_processes: - sample_all_processes[process_name].add(process_id) - else: - sample_all_processes[process_name] = set([process_id]) - - if process_name in sample_filter_processes: - sample_filter_processes[process_name].add(process_id) - else: - sample_filter_processes[process_name] = set([process_id]) + # Only output samples with a 'Werklijstnummer' + if 'Dx Werklijstnummer' in sample.udf: + # Setup empty vars + meetw_zui, meetw_zui_herh, meetw_sampleprep, meetw_sampleprep_herh, meetw_seq, meetw_seq_herh = [''] * 6 + sequence_name, gatk_vcf, exomedepth_vcf = [''] * 3 + + # Only lookup meetw and vcf files for WES samples + if sample.udf['Dx Stoftest code'] == config.stoftestcode_wes: + sample_artifacts = lims.get_artifacts(samplelimsid=sample.id, type='Analyte') + # Filter artifacts without parent_process + sample_artifacts = [ + sample_artifact for sample_artifact in sample_artifacts if sample_artifact.parent_process + ] + # Sort artifact by parent process id + sample_artifacts = sorted( + sample_artifacts, + key=lambda artifact: int(artifact.parent_process.id.split('-')[-1]) + ) - # Determine meetw - repeat_cutoff = len(sample.udf['Dx Werklijstnummer'].split(';')) * 2 - meetw_zui, meetw_zui_herh = determin_meetw( - config.meetw_zui_wes_processes, sample_all_processes, repeat_cutoff - ) - meetw_sampleprep, meetw_sampleprep_herh = determin_meetw( - config.meetw_sampleprep_wes_processes, sample_filter_processes, 2 - ) - meetw_seq, meetw_seq_herh = determin_meetw( - config.meetw_seq_wes_processes, sample_filter_processes, 2 - ) + sample_all_processes = {} + # reset after 'Dx Sample registratie zuivering' process + # this is a new import from helix, should not be counted as a repeat + sample_filter_processes = {} + + for sample_artifact in sample_artifacts: + if 'Dx Sample registratie zuivering' in sample_artifact.parent_process.type.name: + sample_filter_processes = {} # reset after new helix import + process_id = sample_artifact.parent_process.id + process_name = sample_artifact.parent_process.type.name + + if process_name in sample_all_processes: + sample_all_processes[process_name].add(process_id) + else: + sample_all_processes[process_name] = set([process_id]) + + if process_name in sample_filter_processes: + sample_filter_processes[process_name].add(process_id) + else: + sample_filter_processes[process_name] = set([process_id]) + + # Determine meetw + repeat_cutoff = len(sample.udf['Dx Werklijstnummer'].split(';')) * 2 + meetw_zui, meetw_zui_herh = determin_meetw( + config.meetw_zui_wes_processes, sample_all_processes, repeat_cutoff + ) + meetw_sampleprep, meetw_sampleprep_herh = determin_meetw( + config.meetw_sampleprep_wes_processes, sample_filter_processes, 2 + ) + meetw_seq, meetw_seq_herh = determin_meetw( + config.meetw_seq_wes_processes, sample_filter_processes, 2 + ) - # Determine vcf files - gatk_vcf = '' - exomedepth_vcf = '' - if 'Dx GATK vcf' in artifact.udf: - gatk_vcf = artifact.udf['Dx GATK vcf'] - elif 'Dx GATK vcf' in artifact.input_artifact_list()[0].udf: # Look one more step back. - gatk_vcf = artifact.input_artifact_list()[0].udf['Dx GATK vcf'] + # Determine vcf files + sequence_name = get_sequence_name(artifact) + gatk_vcf = '' + exomedepth_vcf = '' + if 'Dx GATK vcf' in artifact.udf: + gatk_vcf = artifact.udf['Dx GATK vcf'] + elif 'Dx GATK vcf' in artifact.input_artifact_list()[0].udf: # Look one more step back. + gatk_vcf = artifact.input_artifact_list()[0].udf['Dx GATK vcf'] - if 'Dx ExomeDepth vcf' in artifact.udf: - exomedepth_vcf = artifact.udf['Dx ExomeDepth vcf'] - elif 'Dx ExomeDepth vcf' in artifact.input_artifact_list()[0].udf: # Look one more step back. - exomedepth_vcf = artifact.input_artifact_list()[0].udf['Dx ExomeDepth vcf'] + if 'Dx ExomeDepth vcf' in artifact.udf: + exomedepth_vcf = artifact.udf['Dx ExomeDepth vcf'] + elif 'Dx ExomeDepth vcf' in artifact.input_artifact_list()[0].udf: # Look one more step back. + exomedepth_vcf = artifact.input_artifact_list()[0].udf['Dx ExomeDepth vcf'] output_file.write(( "{meet_id}\t{werklijst}\t{onderzoeksnummer}\t{monsternummer}\t{meetw_zui}\t{meetw_zui_herh}\t" @@ -100,7 +110,7 @@ def helix_magnis(lims, process_id, output_file): meetw_sampleprep=meetw_sampleprep, meetw_sampleprep_herh=meetw_sampleprep_herh, meetw_seq=meetw_seq, meetw_seq_herh=meetw_seq_herh, meetw_bfx='J', - sample_name=get_sequence_name(sample), + sample_name=sequence_name, vcf_file=gatk_vcf, cnv_vcf_file=exomedepth_vcf, ) diff --git a/clarity_epp/placement/__init__.py b/clarity_epp/placement/__init__.py index c9d5394..2baf0f9 100644 --- a/clarity_epp/placement/__init__.py +++ b/clarity_epp/placement/__init__.py @@ -2,6 +2,7 @@ import clarity_epp.placement.artifact import clarity_epp.placement.barcode +import clarity_epp.placement.pipetting import clarity_epp.placement.plate import clarity_epp.placement.pool import clarity_epp.placement.step diff --git a/clarity_epp/placement/artifact.py b/clarity_epp/placement/artifact.py index 8963ea7..aeead75 100644 --- a/clarity_epp/placement/artifact.py +++ b/clarity_epp/placement/artifact.py @@ -10,8 +10,7 @@ def set_sequence_name(lims, process_id): """Change artifact name to sequnece name.""" process = Process(lims, id=process_id) for artifact in process.analytes()[0]: - sample = artifact.samples[0] - artifact.name = get_sequence_name(sample) + artifact.name = get_sequence_name(artifact) artifact.put() @@ -44,48 +43,24 @@ def route_to_workflow(lims, process_id, workflow): ] if workflow == 'post_bioinf': - stoftest_artifacts = {} - for artifact in artifacts_completed: - sample = artifact.samples[0] # Asume 1 sample per artifact - - # Add stoftest to dict - if sample.udf['Dx Stoftest code'] not in stoftest_artifacts: - stoftest_artifacts[sample.udf['Dx Stoftest code']] = {'single': [], 'trio': []} - - # Remove research artifacts - if sample.udf['Dx Stoftest code'] != config.stoftestcode_research: - # Lookup trio samples - # if parent check family members if parent belongs to trio - if sample.udf['Dx Familie status'] == 'Ouder': - parent_status = 'single' - for family_sample in lims.get_samples(udf={'Dx Familienummer': sample.udf['Dx Familienummer']}): - if( - 'Dx Gerelateerde onderzoeken' in family_sample.udf - and sample.udf['Dx Onderzoeknummer'] in family_sample.udf['Dx Gerelateerde onderzoeken'] - and len(family_sample.udf['Dx Gerelateerde onderzoeken'].split(';')) >= 2 - ): - parent_status = 'trio' - stoftest_artifacts[sample.udf['Dx Stoftest code']][parent_status].append(artifact) - # If child check if part of trio - elif( - 'Dx Gerelateerde onderzoeken' in sample.udf - and len(sample.udf['Dx Gerelateerde onderzoeken'].split(';')) >= 2 - ): - stoftest_artifacts[sample.udf['Dx Stoftest code']]['trio'].append(artifact) - # Else not trio - else: - stoftest_artifacts[sample.udf['Dx Stoftest code']]['single'].append(artifact) - - for stoftest, route_artifacts in stoftest_artifacts.items(): - if route_artifacts['single']: - workflow = Workflow(lims, id=config.post_bioinf_workflow[stoftest]['single']['workflow']) - stage = workflow.stages[config.post_bioinf_workflow[stoftest]['single']['stage']] - lims.route_artifacts(route_artifacts['single'], workflow_uri=workflow.uri, stage_uri=stage.uri) - - if route_artifacts['trio']: - workflow = Workflow(lims, id=config.post_bioinf_workflow[stoftest]['trio']['workflow']) - stage = workflow.stages[config.post_bioinf_workflow[stoftest]['trio']['stage']] - lims.route_artifacts(route_artifacts['trio'], workflow_uri=workflow.uri, stage_uri=stage.uri) + # Remove research artifacts + route_artifacts = [ + artifact for artifact in artifacts_completed + if artifact.samples[0].udf['Dx Stoftest code'] != config.stoftestcode_research # Asume all samples metadata is identical. + ] + lims.route_artifacts(route_artifacts, workflow_uri=Workflow(lims, id=config.post_bioinf_workflow).uri) elif workflow == 'sequencing': lims.route_artifacts(artifacts_completed, workflow_uri=Workflow(lims, id=config.sequencing_workflow).uri) + + +def set_norm_manual_udf(lims, process_id): + """Combine mix sample udfs 'Dx norm. manueel'.""" + process = Process(lims, id=process_id) + + for artifact in process.all_outputs(): + artifact.udf['Dx norm. manueel'] = False + for sample in artifact.samples: + if sample.udf['Dx norm. manueel'] == True: + artifact.udf['Dx norm. manueel'] = True + artifact.put() \ No newline at end of file diff --git a/clarity_epp/placement/barcode.py b/clarity_epp/placement/barcode.py index b36eb98..453c191 100644 --- a/clarity_epp/placement/barcode.py +++ b/clarity_epp/placement/barcode.py @@ -9,7 +9,7 @@ def check_family(lims, process_id): for artifact in process.analytes()[0]: sample = artifact.samples[0] barcode = artifact.reagent_labels[0] - + try: query_udf = {'Dx Familienummer': sample.udf['Dx Familienummer']} except KeyError: @@ -18,7 +18,7 @@ def check_family(lims, process_id): else: family_samples = lims.get_samples(udf=query_udf) for family_sample in family_samples: - if family_sample.id != sample.id: + if family_sample.id != sample.id and family_sample.udf['Dx Persoons ID'] != sample.udf['Dx Persoons ID']: family_sample_artifacts = lims.get_artifacts(samplelimsid=family_sample.id, reagent_label=barcode, process_type=process.type.name) if family_sample_artifacts: artifact.udf['Dx monster met BC duplicaat'] = "{sample}".format(sample=family_sample.name) diff --git a/clarity_epp/placement/pipetting.py b/clarity_epp/placement/pipetting.py new file mode 100644 index 0000000..d4ef888 --- /dev/null +++ b/clarity_epp/placement/pipetting.py @@ -0,0 +1,42 @@ +"""Pipetting placement functions.""" + +from genologics.entities import Process + +from .. import get_mix_sample_barcode + + +def check_nunc_input_nunc_output(lims, process_id): + """Check nuncs.""" + process = Process(lims, id=process_id) + for output_artifact in process.all_outputs(): + if output_artifact.type == 'Analyte': + input_nunc_1 = '' + input_nunc_2 = '' + output_nunc = '' + sample_mix = False + if len(output_artifact.samples) > 1: + sample_mix = True + mix_name = get_mix_sample_barcode(output_artifact) + fraction1 = output_artifact.samples[0].udf['Dx Fractienummer'] + fraction2 = output_artifact.samples[1].udf['Dx Fractienummer'] + else: + fraction = output_artifact.samples[0].udf['Dx Fractienummer'] + if 'Dx Sample 1 norm' in output_artifact.udf: + input_nunc_1 = output_artifact.udf['Dx Sample 1 norm'] + if 'Dx Sample 2 norm' in output_artifact.udf: + input_nunc_2 = output_artifact.udf['Dx Sample 2 norm'] + if 'Dx Sample (output)' in output_artifact.udf: + output_nunc = output_artifact.udf['Dx Sample (output)'] + if sample_mix: + if input_nunc_1 == fraction1 and input_nunc_2 == fraction2 and output_nunc == mix_name: + output_artifact.udf['Dx pipetteer check'] = True + elif input_nunc_1 == fraction2 and input_nunc_2 == fraction1 and output_nunc == mix_name: + output_artifact.udf['Dx pipetteer check'] = True + else: + output_artifact.udf['Dx pipetteer check'] = False + else: + if input_nunc_1 == fraction and input_nunc_2 == '' and output_nunc == fraction: + output_artifact.udf['Dx pipetteer check'] = True + else: + output_artifact.udf['Dx pipetteer check'] = False + output_artifact.put() diff --git a/clarity_epp/placement/pool.py b/clarity_epp/placement/pool.py index 5f8b86a..0ef3170 100644 --- a/clarity_epp/placement/pool.py +++ b/clarity_epp/placement/pool.py @@ -1,7 +1,8 @@ """Pool placement functions.""" -from genologics.entities import Artifact, Process, Workflow +from genologics.entities import Process, Workflow, Step +from .. import get_sequence_name, get_sample_artifacts_from_pool import config @@ -12,12 +13,12 @@ def unpooling(lims, process_id): if process.step.actions.next_actions[0]['action'] == 'complete': # Only unpool complete sequencing runs. pool_artifact = process.all_inputs()[0] pool_artifact_parent_process = pool_artifact.parent_process - pool_artifact_demux = lims.get(pool_artifact.uri + '/demux') run_id = pool_artifact.name # Assume run id is set as pool name using placement/artifact/set_runid_name sample_artifacts = [] # sample artifacts before pooling sample_projects = {} + # Get sample projects from samplesheet for artifact in pool_artifact_parent_process.result_files(): if (artifact.name == 'SampleSheet csv' or artifact.name == 'Sample Sheet') and artifact.files: file_id = artifact.files[0].id @@ -33,21 +34,48 @@ def unpooling(lims, process_id): elif project_index and len(data) >= project_index: sample_projects[data[sample_index]] = data[project_index] - for node in pool_artifact_demux.getiterator('artifact'): - if node.find('samples'): - if len(node.find('samples').findall('sample')) == 1: - sample_artifact = Artifact(lims, uri=node.attrib['uri']) - sample = sample_artifact.samples[0] # 1 sample per artifact. + # Parse sequencing run samples and move Dx samples to post sequencing workflow + for sample_artifact in get_sample_artifacts_from_pool(lims, pool_artifact): + sample = sample_artifact.samples[0] # Asume all samples metadata is identical. - # Get sample sequencing run and project from samplesheet - sample_artifact.udf['Dx Sequencing Run ID'] = run_id - if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']: # Use sample.name for external (clarity_portal) samples - sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name] - else: # Use sample_artifact.name for Dx samples (upload via Helix) - sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name] - sample_artifact.put() + # Set sample sequencing run and project + sample_artifact.udf['Dx Sequencing Run ID'] = run_id + # Use sample.name for external (clarity_portal) samples + if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']: + sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name] + else: # Use sample_artifact.name for Dx samples (upload via Helix) + sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name] + sample_artifact.put() - if sample_artifact.samples[0].project and sample_artifact.samples[0].project.udf['Application'] == 'DX': # Only move DX production samples to post sequencing workflow - sample_artifacts.append(sample_artifact) + # Only move DX production samples to post sequencing workflow + if sample.project and sample.project.udf['Application'] == 'DX': + sample_artifacts.append(sample_artifact) lims.route_artifacts(sample_artifacts, workflow_uri=Workflow(lims, id=config.post_sequencing_workflow).uri) + + +def create_patient_pools(lims, process_id): + """Create patient pools for Dx samples based on UDF 'Dx Persoons ID'.""" + step = Step(lims, id=process_id) + step_pools = step.step_pools + patient_pools = {} + + # Create patient pools + for artifact in step_pools.available_inputs: + sample = artifact.samples[0] # Assume one sample per artifact + if sample.udf['Dx Persoons ID'] not in patient_pools: + patient_pools[sample.udf['Dx Persoons ID']] = { + 'name': str(sample.udf['Dx Persoons ID']), + 'inputs': [] + } + patient_pools[sample.udf['Dx Persoons ID']]['inputs'].append(artifact) + + # Transform patient pools to list and put to clarity + step_pools.set_pools(list(patient_pools.values())) + step_pools.put() + + # Rename pools to sequence name + process = Process(lims, id=process_id) + for artifact in process.all_outputs(): + artifact.name = get_sequence_name(artifact) + artifact.put() diff --git a/clarity_epp/qc/qubit.py b/clarity_epp/qc/qubit.py index 4c5fb2e..fcc07b6 100644 --- a/clarity_epp/qc/qubit.py +++ b/clarity_epp/qc/qubit.py @@ -27,6 +27,16 @@ def set_qc_flag(lims, process_id, cutoff=10): sample_measurements_average = sum(sample_measurements) / float(len(sample_measurements)) artifact.udf['Dx Concentratie fluorescentie (ng/ul)'] = sample_measurements_average + # Reset 'Dx norm. manueel' udf + for analyte in process.analytes()[0]: + if analyte.name == sample: + if 'Dx Sample registratie zuivering' in analyte.parent_process.type.name: + if sample_measurements_average <= 29.3: + artifact.samples[0].udf['Dx norm. manueel'] = True + else: + artifact.samples[0].udf['Dx norm. manueel'] = False + artifact.samples[0].put() + if concentration_range[0] <= sample_measurements_average <= concentration_range[1]: if len(sample_measurements) == 1: artifact.qc_flag = 'PASSED' diff --git a/clarity_epp/upload/samples.py b/clarity_epp/upload/samples.py index 93efb5d..b95c4eb 100644 --- a/clarity_epp/upload/samples.py +++ b/clarity_epp/upload/samples.py @@ -8,6 +8,7 @@ from .. import send_email import clarity_epp.upload.utils +import config def from_helix(lims, email_settings, input_file): @@ -75,6 +76,8 @@ def from_helix(lims, email_settings, input_file): 'Dx Protocolomschrijving': {'column': 'Protocolomschrijving'}, 'Dx Einddatum': {'column': 'Einddatum'}, 'Dx Gerelateerde onderzoeken': {'column': 'Gerelateerde onderzoeken'}, + 'Dx gerelateerd aan oz': {'column': 'Gerelateerd aan'}, + 'Dx gerelateerde oz #': {'column': 'Aantal gerelateerde onderzoeken.'}, } header = input_file.readline().rstrip().split(',') # expect header on first line for udf in udf_column: @@ -83,6 +86,7 @@ def from_helix(lims, email_settings, input_file): # Setup email subject = "Lims Helix Upload: {0}".format(project_name) message = "Project: {0}\n\nSamples:\n".format(project_name) + sample_messages = {} # Parse samples for line in input_file: @@ -99,6 +103,8 @@ def from_helix(lims, email_settings, input_file): udf_data[udf] = bool(data[udf_column[udf]['index']].strip()) elif udf == 'Dx Concentratie (ng/ul)': udf_data[udf] = data[udf_column[udf]['index']].replace(',', '.') + if udf_data[udf]: + udf_data[udf] = float(udf_data[udf]) elif udf in ['Dx Monsternummer', 'Dx Fractienummer']: udf_data[udf] = clarity_epp.upload.utils.transform_sample_name(data[udf_column[udf]['index']]) elif udf == 'Dx Gerelateerde onderzoeken': @@ -123,6 +129,13 @@ def from_helix(lims, email_settings, input_file): else: udf_data['Dx Handmatig'] = False + # Set 'Dx norm. manueel' udf for samples with Dx Concentratie (ng/ul) + if udf_data['Dx Concentratie (ng/ul)']: + if udf_data['Dx Concentratie (ng/ul)'] <= 29.3: + udf_data['Dx norm. manueel'] = True + else: + udf_data['Dx norm. manueel'] = False + # Set 'Dx Familie status' udf if udf_data['Dx Onderzoeksreden'] == 'Bevestiging diagnose': udf_data['Dx Familie status'] = 'Kind' @@ -162,7 +175,7 @@ def from_helix(lims, email_settings, input_file): udf_data['Dx Familienummer'] = udf_data['Dx Familienummer'].split('/')[-1].strip(' ') # Set NICU status for related samples using Dx Gerelateerde onderzoeken - if udf_data['Dx NICU Spoed']: + if udf_data['Dx NICU Spoed'] and not udf_data['Dx Onderzoeksreden'] == 'Informativiteitstest': for related_research in udf_data['Dx Gerelateerde onderzoeken'].split(';'): for related_sample in lims.get_samples(udf={'Dx Onderzoeknummer': related_research}): related_sample.udf['Dx NICU Spoed'] = udf_data['Dx NICU Spoed'] @@ -170,35 +183,86 @@ def from_helix(lims, email_settings, input_file): # Set NICU status for sample if related sample is NICU else: for related_sample in lims.get_samples(udf={'Dx Familienummer': udf_data['Dx Familienummer']}): - if( + if ( 'Dx Gerelateerde onderzoeken' in related_sample.udf and udf_data['Dx Onderzoeknummer'] in related_sample.udf['Dx Gerelateerde onderzoeken'] ): udf_data['Dx NICU Spoed'] = related_sample.udf['Dx NICU Spoed'] + # Set 'Dx Mengfractie' + if udf_data['Dx Stoftest code'] == config.stoftestcode_wes_duplo: + udf_data['Dx Mengfractie'] = True + + # Find WES sample(s) + duplo_samples = lims.get_samples(udf={ + 'Dx Persoons ID': udf_data['Dx Persoons ID'], + 'Dx Onderzoeknummer': udf_data['Dx Onderzoeknummer'], + 'Dx Stoftest code': config.stoftestcode_wes, + }) + if duplo_samples: # Set duplo status for WES samples + for duplo_sample in duplo_samples: + duplo_sample.udf['Dx Mengfractie'] = True + duplo_sample.put() + # Check Dx Monsternummer + if duplo_sample.udf['Dx Monsternummer'] == udf_data['Dx Monsternummer']: + udf_data['Dx Import warning'] = '; '.join([ + 'WES en WES_duplo zelfde monster ({sample}).'.format(sample=duplo_sample.name), + udf_data['Dx Import warning'] + ]) + else: # Set import warning if no WES samples found + udf_data['Dx Import warning'] = '; '.join(['Alleen WES_duplo aangemeld.', udf_data['Dx Import warning']]) + + elif udf_data['Dx Stoftest code'] == config.stoftestcode_wes: + # Find WES_duplo sample(s) + duplo_samples = lims.get_samples(udf={ + 'Dx Persoons ID': udf_data['Dx Persoons ID'], + 'Dx Onderzoeknummer': udf_data['Dx Onderzoeknummer'], + 'Dx Stoftest code': config.stoftestcode_wes_duplo, + }) + if duplo_samples: # Set duplo status for WES sample + udf_data['Dx Mengfractie'] = True + for duplo_sample in duplo_samples: + # Remove import warning from WES_duplo samples + if 'Dx Import warning' in duplo_sample.udf and 'Alleen WES_duplo aangemeld.' in duplo_sample.udf['Dx Import warning']: + import_warning = duplo_sample.udf['Dx Import warning'].split(';') + import_warning.remove('Alleen WES_duplo aangemeld.') + duplo_sample.udf['Dx Import warning'] = '; '.join(import_warning) + duplo_sample.put() + + # Check Dx Monsternummer + if duplo_sample.udf['Dx Monsternummer'] == udf_data['Dx Monsternummer']: + udf_data['Dx Import warning'] = '; '.join([ + 'WES en WES_duplo zelfde monster ({sample}).'.format(sample=duplo_sample.name), + udf_data['Dx Import warning'] + ]) + else: + udf_data['Dx Mengfractie'] = False + # Check other samples from patient sample_list = lims.get_samples(udf={'Dx Persoons ID': udf_data['Dx Persoons ID']}) for sample in sample_list: if sample.udf['Dx Monsternummer'] == udf_data['Dx Monsternummer']: - if( + if ( sample.udf['Dx Protocolomschrijving'] in udf_data['Dx Protocolomschrijving'] and sample.udf['Dx Foetus'] == udf_data['Dx Foetus'] ): udf_data['Dx Import warning'] = '; '.join([ - '{sample}: Monsternummer hetzelfde, Protocolomschrijving hetzelfde.'.format(sample=sample.name), + 'Herhaling of dubbele indicatie, beide monsters ingeladen ({sample}).'.format(sample=sample.name), udf_data['Dx Import warning'] ]) - else: + elif 'Dx Mengfractie' not in sample.udf or not sample.udf['Dx Mengfractie']: udf_data['Dx Import warning'] = '; '.join([ - '{sample}: Monsternummer hetzelfde, Protocolomschrijving uniek.'.format(sample=sample.name), + 'Eerder onderzoek met protocolomschrijving {protocol} ({sample}).'.format( + protocol=sample.udf['Dx Protocolomschrijving'], sample=sample.name + ), udf_data['Dx Import warning'] ]) - elif( + elif ( sample.udf['Dx Protocolomschrijving'] in udf_data['Dx Protocolomschrijving'] and sample.udf['Dx Foetus'] == udf_data['Dx Foetus'] ): udf_data['Dx Import warning'] = '; '.join([ - '{sample}: Monsternummer uniek, Protocolomschrijving hetzelfde.'.format(sample=sample.name), + 'Herhaling of dubbele indicatie, beide monsters ingeladen ({sample}).'.format(sample=sample.name), udf_data['Dx Import warning'] ]) @@ -209,18 +273,19 @@ def from_helix(lims, email_settings, input_file): sample = Sample.create(lims, container=container, position='1:1', project=project, name=sample_name, udf=udf_data) lims.route_artifacts([sample.artifact], workflow_uri=workflow.uri) if udf_data['Dx Import warning']: - message += "{0}\tCreated and added to workflow: {1}.\tImport warning: {2}\n".format( + sample_messages[sample.name] = "{0}\tCreated and added to workflow: {1}.\tImport warning: {2}".format( sample.name, workflow.name, udf_data['Dx Import warning'] ) else: - message += "{0}\tCreated and added to workflow: {1}.\n".format(sample.name, workflow.name) + sample_messages[sample.name] = "{0}\tCreated and added to workflow: {1}.".format(sample.name, workflow.name) else: - message += "{0}\tERROR: Stoftest code {1} is not linked to a workflow.\n".format( + sample_messages[sample_name] += "{0}\tERROR: Stoftest code {1} is not linked to a workflow.".format( sample_name, udf_data['Dx Stoftest code'] ) # Send final email + message += '\n'.join(sample_messages.values()) send_email(email_settings['server'], email_settings['from'], email_settings['to_import_helix'], subject, message) diff --git a/clarity_epp/upload/tecan.py b/clarity_epp/upload/tecan.py index 20a4598..d0e33c0 100644 --- a/clarity_epp/upload/tecan.py +++ b/clarity_epp/upload/tecan.py @@ -77,6 +77,17 @@ def results_qc(lims, process_id): sample_concentration = ((sample_fluorescence - baseline_fluorescence) * regression_slope) / 2.0 artifact.udf['Dx Concentratie fluorescentie (ng/ul)'] = sample_concentration + # Reset 'Dx norm. manueel' udf + if 'Dx Tecan std' not in artifact.name: + for analyte in process.analytes()[0]: + if analyte.name == artifact.name: + if 'Dx Sample registratie zuivering' in analyte.parent_process.type.name: + if sample_concentration <= 29.3: + artifact.samples[0].udf['Dx norm. manueel'] = True + else: + artifact.samples[0].udf['Dx norm. manueel'] = False + artifact.samples[0].put() + # Set artifact Concentratie fluorescentie # Get artifact index == count if artifact_name not in artifact_count: @@ -137,3 +148,34 @@ def results_purify_normalise(lims, process_id): artifact.udf['Dx Concentratie fluorescentie (ng/ul)'] = tecan_result[sample.udf['Dx Fractienummer']]['conc'] artifact.udf['Dx QC status'] = tecan_result[sample.udf['Dx Fractienummer']]['norm'] artifact.put() + + +def results_purify_mix(lims, process_id): + """Upload tecan results to artifacts (mix samples).""" + process = Process(lims, id=process_id) + + # Find and parse Tecan Fluent 480 Output + tecan_result = {} + for result_file in process.result_files(): + if result_file.name == 'Tecan Fluent 480 Output': + file_data = lims.get_file_contents(result_file.files[0].id).split('\n') + header = file_data[0].rstrip().split(';') + for line in file_data[1:]: + if line.rstrip(): + data = line.rstrip().split(';') + tecan_result[data[header.index('SampleID')]] = { + 'conc': float(data[header.index('Concentratie(ng/ul)')]), + 'norm': txt_to_bool(data[header.index('Normalisatie')]) + } + break # File found exit loop + + # Set concentration values on artifacts + for artifact in process.analytes()[0]: + if len(artifact.samples) > 1: + artifact.udf['Dx Concentratie fluorescentie (ng/ul)'] = tecan_result[artifact.name]['conc'] + artifact.udf['Dx QC status'] = tecan_result[artifact.name]['norm'] + else: + sample = artifact.samples[0] + artifact.udf['Dx Concentratie fluorescentie (ng/ul)'] = tecan_result[sample.udf['Dx Monsternummer']]['conc'] + artifact.udf['Dx QC status'] = tecan_result[sample.udf['Dx Monsternummer']]['norm'] + artifact.put() diff --git a/config.py b/config.py index 6e5a22a..55d4581 100755 --- a/config.py +++ b/config.py @@ -19,31 +19,37 @@ # Import samples: stoftestcode to workflow stoftestcode_wes = 'NGS_025' +stoftestcode_wes_duplo = 'NGS_028' stoftestcode_mip = 'NGS_027' stoftestcode_research = 'NGS_023' stoftestcode_workflow = { - stoftestcode_wes: '1654', # DEV Dx Exoom Magnis v1.2 + stoftestcode_wes: '1852', # DEV Dx Exoom Magnis v2.1 + stoftestcode_wes_duplo: '1852', # DEV Dx Exoom Magnis v2.1 stoftestcode_mip: '1651', # DEV Dx smMIP v1.2 } # Export meetw protocol steps WES meetw_zui_wes_processes = [ 'Dx Sample registratie zuivering v1.1', + 'Dx Sample registratie zuivering v1.2', 'Dx Hamilton uitvullen v1.1', 'Dx Hamilton zuiveren v1.1', 'Dx Zuiveren gDNA manueel v1.1', 'Dx manueel gezuiverd placement v1.2', 'Dx gDNA Normalisatie Caliper v1.1', 'Dx Uitvullen en zuiveren (Fluent 480) v1.0', + 'Dx Uitvullen en zuiveren (Fluent 480) v1.1', 'Dx Normaliseren (Fluent 480) v1.0', - 'Dx gDNA handmatige normalisatie WES v1.0' + 'Dx gDNA handmatige normalisatie WES v1.0', + 'Dx gDNA handmatige normalisatie WES v1.1', ] meetw_sampleprep_wes_processes = [ 'Dx Fragmenteren v1.0', - 'Dx Library Prep & Target Enrichment Magnis v1.0' - ] + 'Dx Library Prep & Target Enrichment Magnis v1.0', + 'Dx Library Prep & Target Enrichment Magnis v1.1', +] meetw_seq_wes_processes = [ 'Dx Multiplexen Enrichment pools Magnis v1.0', @@ -51,7 +57,7 @@ 'Dx Library pool denatureren en laden (NovaSeq) v1.3', 'AUTOMATED - NovaSeq Run (NovaSeq 6000 v3.1)', 'Dx QC controle Lab sequencen v1.1', - 'Dx NovaSeq QC controle Lab sequencen v1.3' + 'Dx NovaSeq QC controle Lab sequencen v1.3', ] # Export meetw protocol steps MIP @@ -63,7 +69,7 @@ 'Dx Capture v1.0', 'Dx Exonuclease behandeling v1.0', 'Dx PCR na exonuclease behandeling v1.0', - 'Dx smMIP multiplexen & BBSS sequence pool v1.0' + 'Dx smMIP multiplexen & BBSS sequence pool v1.0', ] meetw_seq_mip_processes = [ @@ -82,13 +88,8 @@ # Post sequencing workflow sequencing_workflow = '1701' # DEV Dx Illumina Sequencing v1.2 -post_sequencing_workflow = '902' # DEV Dx Bioinformatica analyses v1.0 -post_bioinf_workflow = { # Contains workflow and workflow stage (number) for single or trio samples - # WES : DEV Dx NGS WES onderzoeken afronden v1.1 - stoftestcode_wes: {'single': {'workflow': '1401', 'stage': 0}, 'trio': {'workflow': '1401', 'stage': 1}}, - # MIP : DEV Dx NGS smMIP onderzoeken afronden v1.0 - stoftestcode_mip: {'single': {'workflow': '1202', 'stage': 0}, 'trio': {'workflow': '1202', 'stage': 0}} -} +post_sequencing_workflow = '1204' # DEV Dx Bioinformatica analyses v1.1 +post_bioinf_workflow = '1803' # DEV Dx NGS WES onderzoeken afronden v2.0 # Research Onderzoeksindicatie research_onderzoeksindicatie_project = { diff --git a/scripts/management_review.py b/scripts/management_review.py index 477e4fc..372178b 100644 --- a/scripts/management_review.py +++ b/scripts/management_review.py @@ -10,8 +10,17 @@ password = 'lims_user_password' lims = Lims(baseuri, username, password) +# month quarter +quarters = ['Q1', 'Q2', 'Q3', 'Q4'] +month_quarter = { + '01': 'Q1', '02': 'Q1', '03': 'Q1', + '04': 'Q2', '05': 'Q2', '06': 'Q2', + '07': 'Q3', '08': 'Q3', '09': 'Q3', + '10': 'Q4', '11': 'Q4', '12': 'Q4' +} + # Get DX projects and filter on year based on project name -dx_projects = [project for project in lims.get_projects(udf={'Application': 'DX'}) if project.name.startswith('Dx WL21')] +dx_projects = [project for project in lims.get_projects(udf={'Application': 'DX'}) if project.name.startswith('Dx WL23')] sample_count = 0 # Expected actions @@ -25,7 +34,14 @@ 'Dx 3nM verdunning Magnis', 'Dx Multiplexen Enrichment samples (3nM) Magnis', 'Dx Multiplexen Enrichment pools Magnis', 'Dx Multiplexen sequence pool', 'Dx Library pool denatureren en laden (NovaSeq)', 'AUTOMATED - NovaSeq Run (NovaSeq 6000)', 'Dx Library pool denatureren en laden (NovaSeq) Dx QC controle Lab sequencen', - 'Dx NGS labwerk afronden', 'Dx Bioinformatica analyses', 'Dx NGS onderzoeken afronden', + 'Dx Library pool denatureren en laden (NovaSeq) Dx NovaSeq QC controle Lab sequencen', + 'Dx NGS labwerk afronden', 'Dx Bioinformatica analyses', 'Dx Fingerprint match maken', 'Dx NGS onderzoeken afronden', + 'Dx sample registratie pool', 'Dx Capture', 'Dx Exonuclease behandeling', 'Dx PCR na exonuclease behandeling', + 'Dx smMIP multiplexen & BBSS sequence pool', 'Dx NGS smMIP onderzoeken afronden', 'Dx smMIP sequence pool verdunning', + 'Dx Library pool denatureren en laden (Nextseq)', 'Dx NextSeq Run', + 'Dx Library pool denatureren en laden (Nextseq) Dx QC controle Lab sequencen', + 'Dx Sample registratie', 'Dx gDNA handmatige normalisatie', 'Dx Uitvullen en zuiveren (Fluent 480)', + 'Dx Uitvullen en zuiveren (Fluent 480)', 'Dx Normaliseren (Fluent 480)', 'Dx gDNA handmatige normalisatie WES', # CREv2 'Dx Fragmenteren & BBSS', 'Dx LibraryPrep Caliper KAPA', 'Dx Library Prep amplificatie & clean up KAPA', @@ -40,8 +56,11 @@ 'Dx Tapestation 2200 QC', 'Dx Tapestation 4200 QC', 'Dx Aggregate QC' ] processes_before_qc = [ - 'Dx Hamilton zuiveren', 'Dx Zuiveren gDNA manueel', 'Dx Placement Enrichment Magnis', - 'Dx Multiplexen Enrichment pools Magnis', + 'Dx Hamilton uitvullen', 'Dx Hamilton zuiveren', 'Dx Zuiveren gDNA manueel', 'Dx Placement Enrichment Magnis', + 'Dx Multiplexen Enrichment pools Magnis', 'Dx sample registratie pool', 'Dx smMIP multiplexen & BBSS sequence pool', + 'Dx PCR na exonuclease behandeling', 'Dx Sample registratie', 'Dx Exonuclease behandeling', + 'Dx Sample registratie zuivering', + # CREv2 'Dx Fragmenteren & BBSS', 'Dx LibraryPrep Caliper KAPA', 'Dx Library Prep amplificatie & clean up KAPA', 'Dx Post Enrichment PCR & clean up', 'Dx Multiplexen library pool' @@ -53,16 +72,19 @@ # Setup count dictonary process_action_counts = OrderedDict() -for process in processes: - process_action_counts[process] = {} - for action in action_list: - process_action_counts[process][action] = 0 +for quarter in quarters: + process_action_counts[quarter] = OrderedDict() + for process in processes: + process_action_counts[quarter][process] = {} + for action in action_list: + process_action_counts[quarter][process][action] = 0 for project in dx_projects: for sample in lims.get_samples(projectlimsid=project.id): - if sample.udf['Dx Onderzoeksreden'] == 'Research': # skip research + if 'Dx Onderzoeksreden' not in sample.udf or sample.udf['Dx Onderzoeksreden'] == 'Research': # skip research continue sample_count += 1 + sample_quarter = month_quarter[sample.date_received.split('-')[1]] for artifact in lims.get_artifacts(samplelimsid=sample.id, resolve=True, type='Analyte'): for process in lims.get_processes(inputartifactlimsid=artifact.id): @@ -93,14 +115,17 @@ # Get action for artifact for action in process.step.actions.get_next_actions(): if action['artifact'].id in output_artifacts: - process_action_counts[process_name]['total'] += 1 - process_action_counts[process_name][str(action['action'])] += 1 + process_action_counts[sample_quarter][process_name]['total'] += 1 + process_action_counts[sample_quarter][process_name][str(action['action'])] += 1 print('Total Sample count: {0}'.format(str(sample_count))) -print('Process\t{action_list}'.format(action_list='\t'.join(action_list))) -for process in process_action_counts: - if process_action_counts[process]['total']: - print('{process}\t{action_list}'.format( - process=process, - action_list='\t'.join([str(process_action_counts[process][action]) if action in process_action_counts[process] else '0' for action in action_list]) - )) + +for quarter in quarters: + print(quarter) + print('Process\t{action_list}'.format(action_list='\t'.join(action_list))) + for process in process_action_counts[quarter]: + if process_action_counts[quarter][process]['total']: + print('{process}\t{action_list}'.format( + process=process, + action_list='\t'.join([str(process_action_counts[quarter][process][action]) if action in process_action_counts[quarter][process] else '0' for action in action_list]) + )) diff --git a/templates/NovaSeq_BCLCONVERT_Reverse_Complement_Samplesheet.csv b/templates/NovaSeq_BCLCONVERT_Reverse_Complement_Samplesheet.csv new file mode 100644 index 0000000..73d2674 --- /dev/null +++ b/templates/NovaSeq_BCLCONVERT_Reverse_Complement_Samplesheet.csv @@ -0,0 +1,56 @@ +SORT.BY.${INPUT.CONTAINER.ROW}${INPUT.CONTAINER.COLUMN} +OUTPUT.FILE.NAME,${OUTPUT.CONTAINER.NAME}.csv +PROCESS.POOLED.ARTIFACTS +CONTROL.SAMPLE.DEFAULT.PROJECT.NAME,Controls + +[Header] +Investigator Name,${PROCESS.TECHNICIAN} +Experiment Name,${PROCESS.UDF.Experiment Name} +Date,${DATE} +[Reads] +${PROCESS.UDF.Read 1 Cycles} +${PROCESS.UDF.Read 2 Cycles} +[Settings] +AdapterRead1,${PROCESS.UDF.Adapter} +AdapterRead2,${PROCESS.UDF.Adapter Read 2} +FindAdaptersWithIndels,true +[Data] + +
+Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description +
+ +${INPUT.LIMSID},${SAMPLE.NAME##NoSpecialCharacters},${INPUT.CONTAINER.NAME},${INPUT.CONTAINER.PLACEMENT},${INPUT.REAGENT.SEQUENCE##Single},${INPUT.REAGENT.SEQUENCE##Index2RC},${SAMPLE.PROJECT.NAME##NoSpecialCharacters}, + + +${INPUT.REAGENT.SEQUENCE##Single} +def index = token.indexOf("-") +if (index > 0) { + return token.substring(0,index) +} else { + return token +} + +${INPUT.REAGENT.SEQUENCE##Index2RC} +def index = token.indexOf("-") +if (index > 0) { + return token.split('-')[1].toUpperCase().reverse().collect { base -> + switch (base) { + case 'A': return 'T' + case 'C': return 'G' + case 'G': return 'C' + case 'T': return 'A' + default : throw new Exception("Input sequence '${sequence}' contains an invalid base '${base}'.") + } + }.join('') +} else { + return '' +} + +${SAMPLE.NAME##NoSpecialCharacters} +return token.replaceAll(\"[^a-zA-Z0-9-]\", \"-\") + +${SAMPLE.PROJECT.NAME##NoSpecialCharacters} +return token.replaceAll(\"[^a-zA-Z0-9_]\", \"_\") + + \ No newline at end of file diff --git a/templates/NovaSeq_BCLCONVERT_Samplesheet.csv b/templates/NovaSeq_BCLCONVERT_Samplesheet.csv new file mode 100644 index 0000000..96c0804 --- /dev/null +++ b/templates/NovaSeq_BCLCONVERT_Samplesheet.csv @@ -0,0 +1,49 @@ +SORT.BY.${INPUT.CONTAINER.ROW}${INPUT.CONTAINER.COLUMN} +OUTPUT.FILE.NAME,${OUTPUT.CONTAINER.NAME}.csv +PROCESS.POOLED.ARTIFACTS +CONTROL.SAMPLE.DEFAULT.PROJECT.NAME,Controls +HIDE, ${INPUT.REAGENT.SEQUENCE##Dual}, IF, NODATA + +[Header] +Investigator Name,${PROCESS.TECHNICIAN} +Experiment Name,${PROCESS.UDF.Experiment Name} +Date,${DATE} +[Reads] +${PROCESS.UDF.Read 1 Cycles} +${PROCESS.UDF.Read 2 Cycles} +[Settings] +AdapterRead1,${PROCESS.UDF.Adapter} +AdapterRead2,${PROCESS.UDF.Adapter Read 2} +FindAdaptersWithIndels,true +[Data] + +
+Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description +
+ +${INPUT.LIMSID},${SAMPLE.NAME##NoSpecialCharacters},${INPUT.CONTAINER.NAME},${INPUT.CONTAINER.PLACEMENT},${INPUT.REAGENT.SEQUENCE##Single},${INPUT.REAGENT.SEQUENCE##Dual},${SAMPLE.PROJECT.NAME##NoSpecialCharacters}, + + +${INPUT.REAGENT.SEQUENCE##Single} +def index = token.indexOf("-") +if (index > 0) { + return token.substring(0,index) +} else { + return token +} + +${INPUT.REAGENT.SEQUENCE##Dual} +def index = token.indexOf("-") +if (index >= 0) { + return token.substring(index + 1) +} else { + return null +} + +${SAMPLE.NAME##NoSpecialCharacters} +return token.replaceAll(\"[^a-zA-Z0-9-]\", \"-\") + +${SAMPLE.PROJECT.NAME##NoSpecialCharacters} +return token.replaceAll(\"[^a-zA-Z0-9_]\", \"_\") + + \ No newline at end of file