Merge pull request #27 from UMCUGenetics/develop

v1.3.0
UMCUGenetics · Dec 9, 2020 · 9d6f9a0 · 9d6f9a0
2 parents ab9c512 + 5fce429
commit 9d6f9a0
Show file tree

Hide file tree

Showing 8 changed files with 95 additions and 35 deletions.
diff --git a/clarity_epp.py b/clarity_epp.py
@@ -98,7 +98,9 @@ def export_tecan(args):
 
 def export_workflow(args):
     """Export workflow overview files."""
-    if args.type == 'lab':
+    if args.type == 'all':
+        clarity_epp.export.workflow.helix_all(lims, args.process_id, args.output_file)
+    elif args.type == 'lab':
         clarity_epp.export.workflow.helix_lab(lims, args.process_id, args.output_file)
     elif args.type == 'data_analysis':
         clarity_epp.export.workflow.helix_data_analysis(lims, args.process_id, args.output_file)
@@ -242,7 +244,7 @@ def placement_complete_step(args):
     parser_export_tecan.set_defaults(func=export_tecan)
 
     parser_export_workflow = subparser_export.add_parser('workflow', help='Export workflow result file.', parents=[output_parser])
-    parser_export_workflow.add_argument('type', choices=['lab', 'data_analysis'], help='Workflow type')
+    parser_export_workflow.add_argument('type', choices=['all', 'lab', 'data_analysis'], help='Workflow type')
     parser_export_workflow.add_argument('process_id', help='Clarity lims process id')
     parser_export_workflow.set_defaults(func=export_workflow)
 

diff --git a/clarity_epp/export/caliper.py b/clarity_epp/export/caliper.py
@@ -75,12 +75,10 @@ def samplesheet_normalise(lims, process_id, output_file):
                         machine = 'Qubit'
                         sample = a.samples[0].name
                         measurement = a.udf['Dx Conc. goedgekeurde meting (ng/ul)']
-                        qcflag = a.qc_flag
-                        if qcflag == 'PASSED':
-                            if sample in samples_measurements_qubit:
-                                samples_measurements_qubit[sample].append(measurement)
-                            else:
-                                samples_measurements_qubit[sample] = [measurement]
+                        if sample in samples_measurements_qubit:
+                            samples_measurements_qubit[sample].append(measurement)
+                        else:
+                            samples_measurements_qubit[sample] = [measurement]
                     else:
                         sample = a.samples[0].name
                         if sample not in sample_concentration:
@@ -92,12 +90,10 @@ def samplesheet_normalise(lims, process_id, output_file):
                         machine = 'Tecan'
                         sample = a.samples[0].name
                         measurement = a.udf['Dx Conc. goedgekeurde meting (ng/ul)']
-                        qcflag = a.qc_flag
-                        if qcflag == 'UNKNOWN' or 'PASSED':
-                            if sample in samples_measurements_tecan:
-                                samples_measurements_tecan[sample].append(measurement)
-                            else:
-                                samples_measurements_tecan[sample] = [measurement]
+                        if sample in samples_measurements_tecan:
+                            samples_measurements_tecan[sample].append(measurement)
+                        else:
+                            samples_measurements_tecan[sample] = [measurement]
                     else:
                         sample = a.samples[0].name
                         if sample not in sample_concentration:

diff --git a/clarity_epp/export/email.py b/clarity_epp/export/email.py
@@ -11,7 +11,6 @@ def sequencing_run(lims, email_settings, process_id):
     subject = "LIMS QC Controle - {0}".format(artifact.name)
 
     message = "Sequencing Run: {0}\n".format(artifact.name)
-    message += "Date: {0}\n".format(process.date_run)
     message += "Technician: {0}\n".format(process.technician.name)
     message += "LIMS Next Action: {0}\n\n".format(process.step.actions.next_actions[0]['action'])
 
@@ -25,5 +24,5 @@ def sequencing_run(lims, email_settings, process_id):
         message += "\nManager Review LIMS:\n"
         message += "{0}: {1}\n".format(process.step.actions.escalation['author'].name, process.step.actions.escalation['request'])
         message += "{0}: {1}\n".format(process.step.actions.escalation['reviewer'].name, process.step.actions.escalation['answer'])
-
+    
     send_email(email_settings['from'], email_settings['to_sequencing_run_complete'], subject, message)
diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
@@ -1,5 +1,6 @@
 """Illumina export functions."""
 import operator
+import re
 
 from genologics.entities import Process, Artifact
 
@@ -66,6 +67,7 @@ def get_project(projects, urgent=False):
                     family = 'GIAB'
                 else:
                     family = sample.project.name
+                    family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE)  # Remove 'dx' (ignore case) and strip leading space or _
                 if family not in families:
                     families[family] = {'samples': [], 'NICU': False, 'project_type': family, 'split_project_type': False, 'urgent': False, 'merge': False}
 

diff --git a/clarity_epp/export/workflow.py b/clarity_epp/export/workflow.py
@@ -29,25 +29,27 @@ def helix_lab(lims, process_id, output_file):
         for sample in artifact.samples:
             if 'Dx Werklijstnummer' in sample.udf:  # Only check samples with a 'Werklijstnummer'
                 sample_artifacts = lims.get_artifacts(samplelimsid=sample.id, type='Analyte')
+                sample_artifacts = [sample_artifact for sample_artifact in sample_artifacts if sample_artifact.parent_process]  # Filter artifacts without parent_process
+                sample_artifacts = sorted(sample_artifacts, key=lambda artifact: int(artifact.parent_process.id.split('-')[-1]))  # Sort artifact by parent process id 
+
                 sample_all_processes = {}
                 sample_filter_processes = {}  # reset after Dx Sample registratie zuivering
 
                 for artifact in sample_artifacts:
-                    if artifact.parent_process:
-                        if 'Dx Sample registratie zuivering' in artifact.parent_process.type.name:
-                            sample_filter_processes = {}  # reset after new import
-                        process_id = artifact.parent_process.id
-                        process_name = artifact.parent_process.type.name
-
-                        if process_name in sample_all_processes:
-                            sample_all_processes[process_name].add(process_id)
-                        else:
-                            sample_all_processes[process_name] = set([process_id])
-
-                        if process_name in sample_filter_processes:
-                            sample_filter_processes[process_name].add(process_id)
-                        else:
-                            sample_filter_processes[process_name] = set([process_id])
+                    if 'Dx Sample registratie zuivering' in artifact.parent_process.type.name:
+                        sample_filter_processes = {}  # reset after new helix import
+                    process_id = artifact.parent_process.id
+                    process_name = artifact.parent_process.type.name
+
+                    if process_name in sample_all_processes:
+                        sample_all_processes[process_name].add(process_id)
+                    else:
+                        sample_all_processes[process_name] = set([process_id])
+
+                    if process_name in sample_filter_processes:
+                        sample_filter_processes[process_name].add(process_id)
+                    else:
+                        sample_filter_processes[process_name] = set([process_id])
 
                 # Determine meetw
                 repeat_cutoff = len(sample.udf['Dx Werklijstnummer'].split(';')) * 2
@@ -94,3 +96,60 @@ def helix_data_analysis(lims, process_id, output_file):
 
                 )
             )
+
+def helix_all(lims, process_id, output_file):
+    """Export workflow information in helix table format."""
+    output_file.write("meet_id\twerklijst_nummer\tonderzoeknr\tmonsternummer\tZuivering OK?\tZuivering herh?\tLibprep OK?\tLibprep herh?\tEnrichment OK?\tEnrichment herh?\tSequencen OK?\tSequencen herh?\tBfx analyse OK?\tSNP match OK?\n")
+    process = Process(lims, id=process_id)
+
+    for artifact in process.analytes()[0]:
+        for sample in artifact.samples:
+            if 'Dx Werklijstnummer' in sample.udf:  # Only check samples with a 'Werklijstnummer'
+                sample_artifacts = lims.get_artifacts(samplelimsid=sample.id, type='Analyte')
+                sample_artifacts = [sample_artifact for sample_artifact in sample_artifacts if sample_artifact.parent_process]  # Filter artifacts without parent_process
+                sample_artifacts = sorted(sample_artifacts, key=lambda artifact: int(artifact.parent_process.id.split('-')[-1]))  # Sort artifact by parent process id 
+
+                sample_all_processes = {}
+                sample_filter_processes = {}  # reset after 'Dx Sample registratie zuivering' process -> this is a new import from helix, should not be counted as a repeat
+
+                for sample_artifact in sample_artifacts:
+                    if 'Dx Sample registratie zuivering' in sample_artifact.parent_process.type.name:
+                        sample_filter_processes = {}  # reset after new helix import
+                    process_id = sample_artifact.parent_process.id
+                    process_name = sample_artifact.parent_process.type.name
+
+                    if process_name in sample_all_processes:
+                        sample_all_processes[process_name].add(process_id)
+                    else:
+                        sample_all_processes[process_name] = set([process_id])
+
+                    if process_name in sample_filter_processes:
+                        sample_filter_processes[process_name].add(process_id)
+                    else:
+                        sample_filter_processes[process_name] = set([process_id])
+
+                # Determine meetw
+                repeat_cutoff = len(sample.udf['Dx Werklijstnummer'].split(';')) * 2
+                meetw_zui, meetw_zui_herh = determin_meetw(config.meetw_zui_processes, sample_all_processes, repeat_cutoff)
+                meetw_libprep, meetw_libprep_herh = determin_meetw(config.meetw_libprep_processes, sample_filter_processes, 2)
+                meetw_enrich, meetw_enrich_herh = determin_meetw(config.meetw_enrich_processes, sample_filter_processes, 2)
+                meetw_seq, meetw_seq_herh = determin_meetw(config.meetw_seq_processes, sample_filter_processes, 2)
+
+                meetw_snp_match = 'N'
+                if 'Dx SNPmatch' in list(artifact.udf) and artifact.udf['Dx SNPmatch']:
+                    meetw_snp_match = 'J'
+
+                output_file.write(
+                    "{meet_id}\t{werklijst}\t{onderzoeksnummer}\t{monsternummer}\t{meetw_zui}\t{meetw_zui_herh}\t{meetw_libprep}\t{meetw_libprep_herh}\t{meetw_enrich}\t{meetw_enrich_herh}\t{meetw_seq}\t{meetw_seq_herh}\t{meetw_bfx}\t{meetw_snp_match}\n".format(
+                        meet_id=sample.udf['Dx Meet ID'].split(';')[0],
+                        werklijst=sample.udf['Dx Werklijstnummer'].split(';')[0],
+                        onderzoeksnummer=sample.udf['Dx Onderzoeknummer'].split(';')[0],
+                        monsternummer=sample.udf['Dx Monsternummer'],
+                        meetw_zui=meetw_zui, meetw_zui_herh=meetw_zui_herh,
+                        meetw_libprep=meetw_libprep, meetw_libprep_herh=meetw_libprep_herh,
+                        meetw_enrich=meetw_enrich, meetw_enrich_herh=meetw_enrich_herh,
+                        meetw_seq=meetw_seq, meetw_seq_herh=meetw_seq_herh,
+                        meetw_bfx='J',
+                        meetw_snp_match=meetw_snp_match,
+                    )
+                )
diff --git a/clarity_epp/qc/fragment_length.py b/clarity_epp/qc/fragment_length.py
@@ -11,7 +11,6 @@ def set_qc_flag(lims, process_id):
     max_size = process.udf['Maximale fragmentlengte (bp)']
 
     for artifact in process.all_outputs():
-        print artifact, artifact.name, artifact.files
         try:
             size = artifact.udf['Dx Fragmentlengte (bp)']
             if size >= min_size and size <= max_size:

diff --git a/clarity_epp/upload/tapestation.py b/clarity_epp/upload/tapestation.py
@@ -15,7 +15,10 @@ def results(lims, process_id):
             for line in lims.get_file_contents(tapestation_result_file.id).split('\n'):
                 if line.startswith('FileName'):
                     header = line.split(',')
-                    size_index = header.index('Size [bp]')
+                    if 'Size [bp]' in header:
+                        size_index = header.index('Size [bp]') # Tapestation compact peak table
+                    else:
+                        size_index = header.index('Average Size [bp]') # Tapestation compact region table
                     sample_index = header.index('Sample Description')
 
                 elif line:

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
-genologics
-argparse
+genologics==0.3.20
+argparse==1.4.0