From c217533991f1ef0d95f6c21eb0f0daaaedd682f1 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 18 Jan 2024 17:37:34 +0100
Subject: [PATCH 01/30] WIP, new samplesheet writer.

---
 clarity_epp.py                 |  10 +-
 clarity_epp/export/illumina.py | 256 +++++++++++++++++++++++++++++++++
 config.py                      |  19 +++
 3 files changed, 278 insertions(+), 7 deletions(-)
diff --git a/clarity_epp.py b/clarity_epp.py
index e0ffd81..1c89751 100755
--- a/clarity_epp.py
+++ b/clarity_epp.py
@@ -48,8 +48,8 @@ def export_hamilton(args):
 
 def export_illumina(args):
     """Export (updated) illumina samplesheet."""
-    clarity_epp.export.illumina.update_samplesheet(
-        lims, args.process_id, args.artifact_id, args.output_file, args.conversion_tool
+    clarity_epp.export.illumina.create_samplesheet(
+        lims, args.process_id, args.output_file
     )
 
 
@@ -287,13 +287,9 @@ def placement_pipetting(args):
     parser_export_hamilton.set_defaults(func=export_hamilton)
 
     parser_export_illumina = subparser_export.add_parser(
-        'illumina', help='Export updated illumina samplesheet', parents=[output_parser]
+        'illumina', help='Export illumina samplesheet', parents=[output_parser]
     )
     parser_export_illumina.add_argument('process_id', help='Clarity lims process id')
-    parser_export_illumina.add_argument('artifact_id', help='Clarity lims samplesheet artifact id')
-    parser_export_illumina.add_argument(
-        '-c', '--conversion_tool', choices=['bcl2fastq', 'bclconvert'], default='bcl2fastq', help='Illumina conversion tool'
-    )
     parser_export_illumina.set_defaults(func=export_illumina)
 
     parser_export_labels = subparser_export.add_parser('labels', help='Export container labels', parents=[output_parser])
diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 11aff7c..a0d48c1 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -10,6 +10,262 @@
 import config
 
 
+def get_project(projects, urgent=False):
+    """Get a project name for sample."""
+    if urgent:  # Sort projects for urgent samples on name
+        projects_sorted = sorted(projects.items(), key=operator.itemgetter(0))
+        for project in projects_sorted:
+            if project[1] < 9:
+                return project[0]  # return first project with < 9 samples
+
+    # Sort projects on number of samples, if not urgent or no projects left with <9 samples
+    projects_sorted = sorted(projects.items(), key=operator.itemgetter(1))
+    return projects_sorted[0][0]  # return project with least amount of samples.
+
+
+def get_override_cycles(read_len, umi_len, index_len, max_index_len):
+    """Get override cycles per sample."""
+    # TODO: Adjust for ortientation on index 2
+
+    # Read cycles, Trim last base from read cycles
+    read_1_cycle = f'Y{read_len[0]-1}N1'
+    read_2_cycle = f'Y{read_len[1]-1}N1'
+
+    # Adjust read cycles if umi present
+    if umi_len[0]:
+        read_1_cycle = f'U{umi_len[0]}Y{read_len[0]-1-umi_len[0]}N1'
+    if umi_len[1]:
+        read_2_cycle = f'U{umi_len[1]}Y{read_len[1]-1-umi_len[1]}N1'
+
+    # Index cycles
+    index_1_cycle = f'I{index_len[0]}'
+    index_2_cycle = f'I{index_len[1]}'
+
+    # Adjust if index length is shorter than max index length
+    if index_len[0] < max_index_len[0]:
+        n_bases = max_index_len[0] - index_len[0]
+        index_1_cycle = f'I{index_len[0]}N{n_bases}'
+
+    if index_len[1] < max_index_len[1]:
+        n_bases = max_index_len[1] - index_len[1]
+        index_2_cycle = f'I{index_len[1]}N{n_bases}'
+
+    override_cycles = ';'.join([
+        read_1_cycle,  # read 1
+        index_1_cycle,  # index 1
+        index_2_cycle,  # index 2
+        read_2_cycle,  # read 2
+    ])
+
+    return override_cycles
+
+
+def parse_sample_artifacts(sample_artifacts, process):
+    families = {}
+    samplesheet_samples = {}
+
+    for sample_artifact in sample_artifacts:
+        # Find sample artifact index, expected pattern = "<index name> (index1-index2)"
+        sample_index = re.search(r".*\(([ACTGN]+)-([ACTGN]+)\)$", sample_artifact.reagent_labels[0])
+        sample_sequence_name = get_sequence_name(sample_artifact)
+
+        for sample in sample_artifact.samples:
+            # Dx production sample
+            if (
+                'Dx Familienummer' in list(sample.udf) and
+                'Dx NICU Spoed' in list(sample.udf) and
+                'Dx Protocolomschrijving' in list(sample.udf) and
+                'Dx Stoftest code' in list(sample.udf)
+            ):
+                # Skip Mengfractie samples
+                if sample.udf['Dx Stoftest code'] == config.stoftestcode_wes_duplo:
+                    continue
+
+                # Get sample conversion_settings
+                sample_conversion_setting = config.conversion_settings['default']
+                newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
+                for protocol_code in config.conversion_settings:
+                    if protocol_code in newest_protocol:
+                        sample_conversion_setting = config.conversion_settings[protocol_code]
+                        break
+
+                # Get sample override cycles
+                sample_override_cycles = get_override_cycles(
+                    read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
+                    umi_len=sample_conversion_setting['umi_len'],
+                    trim_last_base=True,
+                    index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
+                    max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']]
+                )
+
+                # Set family and create if not exist
+                family = sample.udf['Dx Familienummer']
+                if family not in families:
+                    families[family] = {
+                        'samples': [],
+                        'NICU': False,
+                        'project_type': sample_conversion_setting['project'],
+                        'split_project_type': sample_conversion_setting['split_project'],
+                        'urgent': False,
+                        'deviating': False  # merge, deep sequencing (5x), etc samples
+                    }
+
+                # Update family information
+                if sample.udf['Dx Onderzoeksreden'] == 'Research':  # Dx research sample
+                    for onderzoeksindicatie in config.research_onderzoeksindicatie_project:
+                        if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie:
+                            project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie]
+                            families[family]['project_type'] = project_type
+                            families[family]['split_project_type'] = False
+                            break
+
+                else:  # Dx clinic sample
+                    if sample.udf['Dx NICU Spoed']:
+                        families[family]['NICU'] = True
+                        families[family]['project_type'] = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
+                        families[family]['split_project_type'] = False
+
+                    # Set urgent status
+                    if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']:
+                        families[family]['urgent'] = True
+
+                    # Set deviating status, remove urgent status if deviating
+                    if (
+                        ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or
+                        ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1)
+                    ):
+                        families[family]['deviating'] = True
+                        families[family]['urgent'] = False
+
+            else:  # Other samples
+                # Use project name as family name and Remove 'dx' (ignore case) and strip leading space or _
+                family = re.sub('^dx[ _]*', '', sample.project.name, flags=re.IGNORECASE)
+                if family not in families:
+                    families[family] = {
+                        'samples': [],
+                        'NICU': False,
+                        'project_type': family,
+                        'split_project_type': False,
+                        'urgent': False,
+                        'deviating': False
+                    }
+
+                # Setup override cycles
+                if 'Dx Override Cycles' in list(sample.udf) and sample.udf['Dx Override Cycles']:
+                    sample_override_cycles = sample.udf['Dx Override Cycles']
+                else:
+                    sample_override_cycles = get_override_cycles(
+                        read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
+                        umi_len=config.conversion_settings['default']['umi_len'],
+                        trim_last_base=config.conversion_settings['default']['trim_last_base'],
+                        index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
+                        max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']]
+                    )
+
+            # Add sample to samplesheet_samples
+            samplesheet_samples[sample_sequence_name] = {
+                'index_1': sample_index.group(1),
+                'index_2': sample_index.group(2),
+                'override_cycles': sample_override_cycles,
+            }
+
+            # Add sample to family
+            if sample_sequence_name not in families[family]['samples']:
+                families[family]['samples'].append(sample_sequence_name)
+
+    # Get all project types and count samples
+    project_types = {}
+    for family in families.values():
+        if family['project_type'] in project_types:
+            project_types[family['project_type']]['sample_count'] += len(family['samples'])
+        else:
+            project_types[family['project_type']] = {
+                'sample_count': len(family['samples']),
+                'projects': {},
+                'split_project_type': family['split_project_type']
+            }
+
+    # Define projects per project_type
+    for project_type in project_types:
+        project_types[project_type]['index'] = 0
+        if project_types[project_type]['split_project_type']:
+            for i in range(0, int(project_types[project_type]['sample_count']/9+1)):
+                project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0
+        else:
+            project_types[project_type]['projects'][project_type] = 0
+
+    # Set sample projects
+    # Urgent families / samples, skip deviating
+    for family in [family for family in families.values() if family['urgent'] and not family['deviating']]:
+        family_project = get_project(project_types[family['project_type']]['projects'], urgent=True)
+        for sample_sequence_name in family['samples']:
+            samplesheet_samples[sample_sequence_name]['project'] = family_project
+            project_types[family['project_type']]['projects'][family_project] += 1
+
+    # Deviating families / samples
+    for family in [family for family in families.values() if family['deviating']]:
+        family_project = get_project(project_types[family['project_type']]['projects'])
+        for sample_sequence_name in family['samples']:
+            samplesheet_samples[sample_sequence_name]['project'] = family_project
+            project_types[family['project_type']]['projects'][family_project] += 1
+
+    # Non urgent and non deviating families / samples
+    normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']]
+    for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True):
+        family_project = get_project(project_types[family['project_type']]['projects'])
+        for sample_sequence_name in family['samples']:
+            samplesheet_samples[sample_sequence_name]['project'] = family_project
+            project_types[family['project_type']]['projects'][family_project] += 1
+
+    return samplesheet_samples
+
+
+def create_samplesheet(lims, process_id, output_file):
+    """Create illumina samplesheet v2."""
+    # Default trim last base
+
+    process = Process(lims, id=process_id)
+    sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
+    samplesheet_samples = parse_sample_artifacts(sample_artifacts, process)
+
+    # Create SampleSheet
+    # TODO: Add orientation support for index 2
+    # TODO: Compare with novaseq 6000 samplesheets
+    sample_sheet = []
+
+    # Header
+    sample_sheet.append('[Header]')
+    sample_sheet.append('FileFormatVersion,2')
+
+    # Reads
+    sample_sheet.append('[Reads]')
+    sample_sheet.append('Read1Cycles,{0}'.format(process.udf['Read 1 Cycles']))
+    sample_sheet.append('Read2Cycles,{0}'.format(process.udf['Read 2 Cycles']))
+
+    # BCLConvert_Settings
+    sample_sheet.append('[BCLConvert_Settings]')
+    sample_sheet.append('AdapterRead1,{0}'.format(process.udf['Adapter']))
+    sample_sheet.append('AdapterRead2,{0}'.format(process.udf['Adapter Read 2']))
+    sample_sheet.append('FindAdaptersWithIndels,true')
+
+    # BCLConvert_Data
+    sample_sheet.append('[BCLConvert_Data]')
+    sample_sheet.append('Sample_ID,index,index2,OverrideCycles,Sample_Project')
+
+    for sample in samplesheet_samples:
+        sample_sheet.append(
+            '{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
+                sample_name=sample,
+                index_1=samplesheet_samples[sample]['index_1'],
+                index_2=samplesheet_samples[sample]['index_2'],
+                override_cycles=samplesheet_samples[sample]['override_cycles'],
+                project=samplesheet_samples[sample]['project']
+            )
+        )
+
+    output_file.write('\n'.join(sample_sheet))
+
+
 def update_samplesheet(lims, process_id, artifact_id, output_file, conversion_tool):
     """Update illumina samplesheet."""
     process = Process(lims, id=process_id)
diff --git a/config.py b/config.py
index c002932..97b13ed 100755
--- a/config.py
+++ b/config.py
@@ -87,6 +87,25 @@
     'AUTOMATED - NovaSeq Run (NovaSeq 6000 v3.1)',
 ]
 
+# BCLConvert conversion settings
+conversion_settings = {
+    'default': {
+        'project': 'unknown',
+        'split_project': False,
+        'umi_len': [0, 0],
+    },
+    'elidS34226467': {
+        'project': 'CREv4',
+        'split_project': True,
+        'umi_len': [5, 5],
+    },
+    'elidS31285117': {
+        'project': 'SSv7',
+        'split_project': True,
+        'umi_len': [5, 5],
+    },
+}
+
 # Post sequencing workflow
 sequencing_workflow = '1701'  # DEV Dx Illumina Sequencing v1.2
 post_sequencing_workflow = '1204'  # DEV Dx Bioinformatica analyses v1.1

From 824434fa94147fb09479c489402c3654ba016a6f Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 19 Jan 2024 15:16:11 +0100
Subject: [PATCH 02/30] Add orientation fixes

---
 clarity_epp/export/illumina.py | 72 ++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index a0d48c1..ee82063 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -23,9 +23,8 @@ def get_project(projects, urgent=False):
     return projects_sorted[0][0]  # return project with least amount of samples.
 
 
-def get_override_cycles(read_len, umi_len, index_len, max_index_len):
+def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_orientation):
     """Get override cycles per sample."""
-    # TODO: Adjust for ortientation on index 2
 
     # Read cycles, Trim last base from read cycles
     read_1_cycle = f'Y{read_len[0]-1}N1'
@@ -48,7 +47,10 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len):
 
     if index_len[1] < max_index_len[1]:
         n_bases = max_index_len[1] - index_len[1]
-        index_2_cycle = f'I{index_len[1]}N{n_bases}'
+        if index_2_orientation == 'RC':
+            index_2_cycle = f'I{index_len[1]}N{n_bases}'
+        else:  # index_2_orientation == 'F
+            index_2_cycle = f'N{n_bases}I{index_len[1]}'
 
     override_cycles = ';'.join([
         read_1_cycle,  # read 1
@@ -60,7 +62,7 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len):
     return override_cycles
 
 
-def parse_sample_artifacts(sample_artifacts, process):
+def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
     families = {}
     samplesheet_samples = {}
 
@@ -93,9 +95,9 @@ def parse_sample_artifacts(sample_artifacts, process):
                 sample_override_cycles = get_override_cycles(
                     read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
                     umi_len=sample_conversion_setting['umi_len'],
-                    trim_last_base=True,
                     index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
-                    max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']]
+                    max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
+                    index_2_orientation=index_2_orientation
                 )
 
                 # Set family and create if not exist
@@ -157,9 +159,9 @@ def parse_sample_artifacts(sample_artifacts, process):
                     sample_override_cycles = get_override_cycles(
                         read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
                         umi_len=config.conversion_settings['default']['umi_len'],
-                        trim_last_base=config.conversion_settings['default']['trim_last_base'],
                         index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
-                        max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']]
+                        max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
+                        index_2_orientation=index_2_orientation
                     )
 
             # Add sample to samplesheet_samples
@@ -168,6 +170,10 @@ def parse_sample_artifacts(sample_artifacts, process):
                 'index_2': sample_index.group(2),
                 'override_cycles': sample_override_cycles,
             }
+            if index_2_orientation == 'RC':  # Reverse complement index 2
+                samplesheet_samples[sample_sequence_name]['index_2'] = clarity_epp.export.utils.reverse_complement(
+                    samplesheet_samples[sample_sequence_name]['index_2']
+                )
 
             # Add sample to family
             if sample_sequence_name not in families[family]['samples']:
@@ -222,15 +228,16 @@ def parse_sample_artifacts(sample_artifacts, process):
 
 def create_samplesheet(lims, process_id, output_file):
     """Create illumina samplesheet v2."""
-    # Default trim last base
-
     process = Process(lims, id=process_id)
-    sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
-    samplesheet_samples = parse_sample_artifacts(sample_artifacts, process)
+    index_2_orientation = config.index_2_orientation[process.type.name]
+
+    # Get samples samples per lane
+    samplesheet_samples = []
+    for lane in process.analytes()[0]:
+        sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
+        samplesheet_samples.append(get_samplesheet_samples(sample_artifacts, process, index_2_orientation))
 
     # Create SampleSheet
-    # TODO: Add orientation support for index 2
-    # TODO: Compare with novaseq 6000 samplesheets
     sample_sheet = []
 
     # Header
@@ -250,18 +257,33 @@ def create_samplesheet(lims, process_id, output_file):
 
     # BCLConvert_Data
     sample_sheet.append('[BCLConvert_Data]')
-    sample_sheet.append('Sample_ID,index,index2,OverrideCycles,Sample_Project')
-
-    for sample in samplesheet_samples:
-        sample_sheet.append(
-            '{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
-                sample_name=sample,
-                index_1=samplesheet_samples[sample]['index_1'],
-                index_2=samplesheet_samples[sample]['index_2'],
-                override_cycles=samplesheet_samples[sample]['override_cycles'],
-                project=samplesheet_samples[sample]['project']
+    if len(samplesheet_samples) == 1:  # All samples on all lanes
+        lane = 0
+        sample_sheet.append('Sample_ID,index,index2,OverrideCycles,Sample_Project')
+        for sample in samplesheet_samples[lane]:
+            sample_sheet.append(
+                '{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
+                    sample_name=sample,
+                    index_1=samplesheet_samples[lane][sample]['index_1'],
+                    index_2=samplesheet_samples[lane][sample]['index_2'],
+                    override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
+                    project=samplesheet_samples[lane][sample]['project']
+                )
             )
-        )
+    else:  # Samples divided over lanes
+        sample_sheet.append('Lane,Sample_ID,index,index2,OverrideCycles,Sample_Project')
+        for lane, lane_samples in enumerate(samplesheet_samples):
+            for sample in lane_samples:
+                sample_sheet.append(
+                    '{lane},{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
+                        lane=lane+1,
+                        sample_name=sample,
+                        index_1=samplesheet_samples[lane][sample]['index_1'],
+                        index_2=samplesheet_samples[lane][sample]['index_2'],
+                        override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
+                        project=samplesheet_samples[lane][sample]['project']
+                    )
+                )
 
     output_file.write('\n'.join(sample_sheet))
 

From 18d322c3873c2ea72aca790a5596ffe53e47b8aa Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 19 Jan 2024 15:17:27 +0100
Subject: [PATCH 03/30] Add orientation fixes

---
 config.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/config.py b/config.py
index 97b13ed..7553e7b 100755
--- a/config.py
+++ b/config.py
@@ -88,6 +88,12 @@
 ]
 
 # BCLConvert conversion settings
+index_2_orientation = {
+    # Orientation options: F=forward or RC=reverse complement
+    # https://knowledge.illumina.com/software/general/software-general-reference_material-list/000001800
+    'Dx Library pool denatureren en laden (NovaSeq) v1.3': 'RC',
+    'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': 'F',
+}
 conversion_settings = {
     'default': {
         'project': 'unknown',

From e816fc23c3ce17e04867531b484031302b1cafdd Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 19 Jan 2024 15:18:16 +0100
Subject: [PATCH 04/30] Remove old update samplesheet code

---
 clarity_epp/export/illumina.py | 286 +--------------------------------
 1 file changed, 1 insertion(+), 285 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index ee82063..549a8e3 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -1,9 +1,8 @@
 """Illumina export functions."""
 import operator
 import re
-import csv
 
-from genologics.entities import Process, Artifact
+from genologics.entities import Process
 
 from .. import get_sequence_name, get_sample_artifacts_from_pool
 import clarity_epp.export.utils
@@ -286,286 +285,3 @@ def create_samplesheet(lims, process_id, output_file):
                 )
 
     output_file.write('\n'.join(sample_sheet))
-
-
-def update_samplesheet(lims, process_id, artifact_id, output_file, conversion_tool):
-    """Update illumina samplesheet."""
-    process = Process(lims, id=process_id)
-    trim_last_base = True  # Used to set Read1EndWithCycle
-
-    def get_project(projects, urgent=False):
-        """Inner function to get a project name for samples."""
-        if urgent:  # Sort projects for urgent samples on name
-            projects_sorted = sorted(projects.items(), key=operator.itemgetter(0))
-            for project in projects_sorted:
-                if project[1] < 9:
-                    return project[0]  # return first project with < 9 samples
-
-        # Sort projects on number of samples, if not urgent or no projects left with <9 samples
-        projects_sorted = sorted(projects.items(), key=operator.itemgetter(1))
-        return projects_sorted[0][0]  # return project with least amount of samples.
-
-    # Parse families
-    families = {}
-    sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
-
-    for sample_artifact in sample_artifacts:
-        for sample in sample_artifact.samples:
-            if (
-                'Dx Familienummer' in list(sample.udf) and
-                'Dx NICU Spoed' in list(sample.udf) and
-                'Dx Protocolomschrijving' in list(sample.udf)
-            ):
-                # Dx production sample
-                family = sample.udf['Dx Familienummer']
-
-                # Create family if not exist
-                if family not in families:
-                    families[family] = {
-                        'samples': [],
-                        'NICU': False,
-                        'project_type': 'unknown_project',
-                        'split_project_type': False,
-                        'urgent': False,
-                        'deviating': False  # merge, deep sequencing (5x), etc samples
-                    }
-
-                # Update family information
-                if sample.udf['Dx Onderzoeksreden'] == 'Research':  # Dx research sample
-                    for onderzoeksindicatie in config.research_onderzoeksindicatie_project:
-                        if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie:
-                            project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie]
-                            families[family]['project_type'] = project_type
-                            families[family]['split_project_type'] = False
-                            break
-
-                else:  # Dx clinic sample
-                    newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
-                    if 'SNP fingerprint MIP' in newest_protocol and not families[family]['NICU']:
-                        project_type = 'Fingerprint'
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = False
-                        trim_last_base = False
-                    elif 'PID09.V7_smMIP' in newest_protocol and not families[family]['NICU']:
-                        project_type = 'ERARE'
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = False
-                        trim_last_base = False
-                    elif sample.udf['Dx NICU Spoed']:
-                        families[family]['NICU'] = True
-                        project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = False
-                    elif 'elidS30409818' in newest_protocol and not families[family]['NICU']:
-                        project_type = 'CREv2'
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = True
-                    elif 'elidS31285117' in newest_protocol and not families[family]['NICU']:
-                        project_type = 'SSv7'
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = True
-                    elif 'elidS34226467' in newest_protocol and not families[family]['NICU']:
-                        project_type = 'CREv4'
-                        families[family]['project_type'] = project_type
-                        families[family]['split_project_type'] = True
-
-                    # Set urgent status
-                    if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']:
-                        families[family]['urgent'] = True
-
-                    # Set deviating status, remove urgent status if deviating
-                    if (
-                        ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or
-                        ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1)
-                    ):
-                        families[family]['deviating'] = True
-                        families[family]['urgent'] = False
-
-            else:  # Other samples
-                if 'GIAB' in sample.name.upper() and not sample.project:  # GIAB control samples
-                    family = 'GIAB'
-                else:
-                    family = sample.project.name
-                    # Remove 'dx' (ignore case) and strip leading space or _
-                    family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE)
-                if family not in families:
-                    families[family] = {
-                        'samples': [],
-                        'NICU': False,
-                        'project_type': family,
-                        'split_project_type': False,
-                        'urgent': False,
-                        'deviating': False
-                    }
-
-            # Add sample_artifact to family
-            if sample_artifact not in families[family]['samples']:
-                families[family]['samples'].append(sample_artifact)
-
-    # Get all project types and count samples
-    project_types = {}
-    for family in families.values():
-        if family['project_type'] in project_types:
-            project_types[family['project_type']]['sample_count'] += len(family['samples'])
-        else:
-            project_types[family['project_type']] = {
-                'sample_count': len(family['samples']),
-                'projects': {},
-                'split_project_type': family['split_project_type']
-            }
-
-    # Define projects per project_type
-    for project_type in project_types:
-        project_types[project_type]['index'] = 0
-        if project_types[project_type]['split_project_type']:
-            for i in range(0, int(project_types[project_type]['sample_count']/9+1)):
-                project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0
-        else:
-            project_types[project_type]['projects'][project_type] = 0
-
-    # Set sample projects
-    sample_projects = {}
-    sample_sequence_names = {}
-
-    # Urgent families / samples, skip deviating
-    for family in [family for family in families.values() if family['urgent'] and not family['deviating']]:
-        family_project = get_project(project_types[family['project_type']]['projects'], urgent=True)
-        for sample_artifact in family['samples']:
-            sample_sequence_name = get_sequence_name(sample_artifact)
-            for sample in sample_artifact.samples:
-                sample_sequence_names[sample.name] = sample_sequence_name
-            sample_projects[sample_sequence_name] = family_project
-            project_types[family['project_type']]['projects'][family_project] += 1
-
-    # Deviating families / samples
-    for family in [family for family in families.values() if family['deviating']]:
-        family_project = get_project(project_types[family['project_type']]['projects'])
-        for sample_artifact in family['samples']:
-            sample_sequence_name = get_sequence_name(sample_artifact)
-            for sample in sample_artifact.samples:
-                sample_sequence_names[sample.name] = sample_sequence_name
-            sample_projects[sample_sequence_name] = family_project
-            project_types[family['project_type']]['projects'][family_project] += 1
-
-    # Non urgent and non deviating families / samples
-    normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']]
-    for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True):
-        family_project = get_project(project_types[family['project_type']]['projects'])
-        for sample_artifact in family['samples']:
-            sample_sequence_name = get_sequence_name(sample_artifact)
-            for sample in sample_artifact.samples:
-                sample_sequence_names[sample.name] = sample_sequence_name
-            sample_projects[sample_sequence_name] = family_project
-            project_types[family['project_type']]['projects'][family_project] += 1
-
-    # Check sequencer type
-    # NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes.
-    if 'nextseq' in process.type.name.lower():
-        nextseq_run = True
-    else:
-        nextseq_run = False
-
-    # Edit clarity samplesheet
-    sample_header = ''  # empty until [data] section
-    settings_section = False
-    samplesheet_artifact = Artifact(lims, id=artifact_id)
-    file_id = samplesheet_artifact.files[0].id
-
-    # Setup custom settings
-    custom_settings = ''
-
-    if conversion_tool == 'bcl2fastq' and trim_last_base:
-        custom_settings = (
-            'Read1EndWithCycle,{read_1_value}\n'
-            'Read2EndWithCycle,{read_2_value}\n'
-        ).format(
-            read_1_value=process.udf['Read 1 Cycles']-1, read_2_value=process.udf['Read 2 Cycles']-1
-        )
-
-    elif conversion_tool == 'bclconvert':
-        # Setup OverrideCycles
-        if trim_last_base or process.udf['UMI - Trim']:
-            override_cycles = [
-                '',  # read 1
-                'I{0}'.format(process.udf['Index Read 1']),  # index 1
-                'I{0}'.format(process.udf['Index Read 2']),  # index 2
-                '',  # read 2
-            ]
-
-            if trim_last_base and process.udf['UMI - Trim']:
-                override_cycles[0] = 'U{umi}Y{read}N1'.format(
-                    umi=process.udf['UMI - Read 1 Length'],
-                    read=process.udf['Read 1 Cycles'] - process.udf['UMI - Read 1 Length'] - 1
-                )
-                override_cycles[3] = 'U{umi}Y{read}N1'.format(
-                    umi=process.udf['UMI - Read 2 Length'],
-                    read=process.udf['Read 2 Cycles'] - process.udf['UMI - Read 2 Length'] - 1
-                )
-                custom_settings = 'TrimUMI,1\n'
-
-            elif trim_last_base:
-                override_cycles[0] = 'Y{read}N1'.format(read=process.udf['Read 1 Cycles'] - 1)
-                override_cycles[3] = 'Y{read}N1'.format(read=process.udf['Read 2 Cycles'] - 1)
-
-            elif process.udf['UMI - Trim']:
-                override_cycles[0] = 'U{umi}Y{read}'.format(
-                    umi=process.udf['UMI - Read 1 Length'],
-                    read=process.udf['Read 1 Cycles'] - process.udf['UMI - Read 1 Length']
-                )
-                override_cycles[3] = 'U{umi}Y{read}'.format(
-                    umi=process.udf['UMI - Read 2 Length'],
-                    read=process.udf['Read 2 Cycles'] - process.udf['UMI - Read 2 Length']
-                )
-                custom_settings = 'TrimUMI,1\n'
-
-            custom_settings = '{settings}OverrideCycles,{override_cycles}\n'.format(
-                settings=custom_settings,
-                override_cycles=';'.join(override_cycles)
-            )
-
-    for data in csv.reader(
-        lims.get_file_contents(id=file_id).rstrip().split('\n'),
-        quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True
-    ):
-        if data[0] == '[Settings]' and custom_settings:
-            output_file.write('{line}\n'.format(line=','.join(data)))
-            output_file.write(custom_settings)
-            settings_section = True
-
-        elif data[0] == '[Data]' and custom_settings and not settings_section:
-            output_file.write('[Settings]\n')
-            output_file.write(custom_settings)
-            output_file.write('{line}\n'.format(line=','.join(data)))
-
-        elif data[0] == 'Sample_ID':  # Samples header line
-            sample_header = data
-            sample_id_index = sample_header.index('Sample_ID')
-            sample_name_index = sample_header.index('Sample_Name')
-            sample_project_index = sample_header.index('Sample_Project')
-
-            if 'index2' in sample_header:
-                index_index = sample_header.index('index2')
-            else:
-                index_index = sample_header.index('index')
-
-            output_file.write('{line}\n'.format(line=','.join(data)))
-
-        elif sample_header:  # Samples header seen, so continue with samples.
-            sample_name = data[sample_name_index].split(',')[0]
-            if sample_name in sample_sequence_names:
-                data[sample_name_index] = sample_sequence_names[sample_name]
-
-            # Set Sample_Project
-            if data[sample_name_index] in sample_projects:
-                data[sample_project_index] = sample_projects[data[sample_name_index]]
-
-            # Overwrite Sample_ID with Sample_name to get correct conversion output folder structure
-            data[sample_id_index] = data[sample_name_index]
-
-            # Reverse complement index for NextSeq runs
-            if nextseq_run:
-                data[index_index] = clarity_epp.export.utils.reverse_complement(data[index_index])
-
-            output_file.write('{line}\n'.format(line=','.join(data)))
-        else:  # Leave other lines untouched.
-            output_file.write('{line}\n'.format(line=','.join(data)))

From eeb11c48a7e928bb178f5ee84838053d935b6cec Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 19 Jan 2024 16:06:33 +0100
Subject: [PATCH 05/30] Update sequencing workflow

---
 config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config.py b/config.py
index 7553e7b..076eb83 100755
--- a/config.py
+++ b/config.py
@@ -113,7 +113,7 @@
 }
 
 # Post sequencing workflow
-sequencing_workflow = '1701'  # DEV Dx Illumina Sequencing v1.2
+sequencing_workflow = '2052'  # DEV Dx Illumina Sequencing v1.3
 post_sequencing_workflow = '1204'  # DEV Dx Bioinformatica analyses v1.1
 post_bioinf_workflow = '1803'  # DEV Dx NGS WES onderzoeken afronden v2.0
 

From 890feda60ec49ce95f67330a56ece30f278c25f4 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 22 Jan 2024 14:28:06 +0100
Subject: [PATCH 06/30] Add illumina tests

---
 tests/test_export_illumina.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 tests/test_export_illumina.py

diff --git a/tests/test_export_illumina.py b/tests/test_export_illumina.py
new file mode 100644
index 0000000..fd5eb9e
--- /dev/null
+++ b/tests/test_export_illumina.py
@@ -0,0 +1,21 @@
+from clarity_epp.export import illumina
+
+
+def test_get_override_cycles():
+    # Magnis prep with legacy index settings (8, 8)
+    assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [8, 8], 'RC') == 'U5Y145N1;I8;I8;U5Y145N1'
+    assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [8, 8], 'F') == 'U5Y145N1;I8;I8;U5Y145N1'
+
+    # Magnis prep with new default index settings (19, 10)
+    assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [19, 10], 'RC') == 'U5Y145N1;I8N11;I8N2;U5Y145N1'
+    assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [19, 10], 'F') == 'U5Y145N1;I8N11;N2I8;U5Y145N1'
+
+
+def test_get_project():
+    assert illumina.get_project({'SSv7_1': 1, 'SSv7_2': 0, 'SSv7_3': 0}) == 'SSv7_2'
+    assert illumina.get_project({'SSv7_1': 1, 'SSv7_2': 0, 'SSv7_3': 0}, urgent=True) == 'SSv7_1'
+    assert illumina.get_project({'SSv7_1': 3, 'SSv7_2': 1, 'SSv7_3': 0}) == 'SSv7_3'
+    assert illumina.get_project({'SSv7_1': 3, 'SSv7_2': 1, 'SSv7_3': 0}, urgent=True) == 'SSv7_1'
+    assert illumina.get_project({'SSv7_1': 3, 'SSv7_2': 1, 'SSv7_3': 1}) == 'SSv7_2'
+    assert illumina.get_project({'SSv7_1': 3, 'SSv7_2': 1, 'SSv7_3': 0}, urgent=True) == 'SSv7_1'
+    assert illumina.get_project({'SSv7_1': 9, 'SSv7_2': 5, 'SSv7_3': 5}, urgent=True) == 'SSv7_2'

From 2b45a43cb89652ce7e1483d60af755992b22d0a2 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 22 Jan 2024 17:18:02 +0100
Subject: [PATCH 07/30] Move to get_sample_sequence_index to utils and test

---
 clarity_epp/export/illumina.py | 14 +++++++-------
 clarity_epp/export/utils.py    | 11 +++++++++++
 tests/test_export_utils.py     |  7 +++++++
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 549a8e3..28fa276 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -5,7 +5,7 @@
 from genologics.entities import Process
 
 from .. import get_sequence_name, get_sample_artifacts_from_pool
-import clarity_epp.export.utils
+from clarity_epp.export.utils import get_sample_sequence_index, reverse_complement
 import config
 
 
@@ -67,7 +67,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
 
     for sample_artifact in sample_artifacts:
         # Find sample artifact index, expected pattern = "<index name> (index1-index2)"
-        sample_index = re.search(r".*\(([ACTGN]+)-([ACTGN]+)\)$", sample_artifact.reagent_labels[0])
+        sample_index = get_sample_sequence_index(sample_artifact.reagent_labels[0])
         sample_sequence_name = get_sequence_name(sample_artifact)
 
         for sample in sample_artifact.samples:
@@ -94,7 +94,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                 sample_override_cycles = get_override_cycles(
                     read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
                     umi_len=sample_conversion_setting['umi_len'],
-                    index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
+                    index_len=[len(sample_index[0]), len(sample_index[1])],
                     max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
                     index_2_orientation=index_2_orientation
                 )
@@ -158,19 +158,19 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                     sample_override_cycles = get_override_cycles(
                         read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
                         umi_len=config.conversion_settings['default']['umi_len'],
-                        index_len=[len(sample_index.group(1)), len(sample_index.group(2))],
+                        index_len=[len(sample_index[0]), len(sample_index[1])],
                         max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
                         index_2_orientation=index_2_orientation
                     )
 
             # Add sample to samplesheet_samples
             samplesheet_samples[sample_sequence_name] = {
-                'index_1': sample_index.group(1),
-                'index_2': sample_index.group(2),
+                'index_1': sample_index[0],
+                'index_2': sample_index[1],
                 'override_cycles': sample_override_cycles,
             }
             if index_2_orientation == 'RC':  # Reverse complement index 2
-                samplesheet_samples[sample_sequence_name]['index_2'] = clarity_epp.export.utils.reverse_complement(
+                samplesheet_samples[sample_sequence_name]['index_2'] = reverse_complement(
                     samplesheet_samples[sample_sequence_name]['index_2']
                 )
 
diff --git a/clarity_epp/export/utils.py b/clarity_epp/export/utils.py
index 1f9d20c..52b1987 100755
--- a/clarity_epp/export/utils.py
+++ b/clarity_epp/export/utils.py
@@ -1,4 +1,5 @@
 """Utility functions used for creating samplesheets."""
+import re
 
 
 def sort_96_well_plate(wells):
@@ -84,3 +85,13 @@ def get_well_index(well, one_based=False):
         return wells.index(well) + 1
     else:
         return wells.index(well)
+
+
+def get_sample_sequence_index(reagent_label):
+    """Return sample sequence indices [index1, index2] from reagent label.
+    expected reagent label pattern = "<index name> (index1-index2)
+    """
+    sample_index_search = re.search(r".*\(([ACTGN]+)-([ACTGN]+)\)$", reagent_label)
+    sample_index = [sample_index_search.group(1), sample_index_search.group(2)]
+
+    return sample_index
\ No newline at end of file
diff --git a/tests/test_export_utils.py b/tests/test_export_utils.py
index 70bfea8..0918057 100644
--- a/tests/test_export_utils.py
+++ b/tests/test_export_utils.py
@@ -21,3 +21,10 @@ def test_sort_artifact_list():
 def test_get_well_index():
     assert utils.get_well_index('A1') == 0
     assert utils.get_well_index('A1', one_based=True) == 1
+
+
+def test_get_sample_sequence_index():
+    assert utils.get_sample_sequence_index('Dx 12D NEXTflex UDI 48 (TTAGAGTC-TGTGACGA)') == ['TTAGAGTC', 'TGTGACGA']
+    assert utils.get_sample_sequence_index('Dx 10G NEXTflex custom UDI 79 (TGAGGCGC-GGAGACCA)') == ['TGAGGCGC', 'GGAGACCA']
+    assert utils.get_sample_sequence_index('Dx 01G Agilent SureSelect XT HS2 UDI_v2 007 (GCAGGTTC-AGAAGCAA)') == ['GCAGGTTC', 'AGAAGCAA']
+    assert utils.get_sample_sequence_index('Dx 02B Agilent SureSelect XT HS2 UDI_v1 010 (TAGAGCTC-CTACCGAA)') == ['TAGAGCTC', 'CTACCGAA']

From 0d33a9ab908d1bf4d6c74d32b0395b2b87e324f8 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 22 Jan 2024 17:19:51 +0100
Subject: [PATCH 08/30] Enable unit test @ master

---
 .github/workflows/python.yml | 51 ++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 6f567d9..5f7fcff 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -5,13 +5,12 @@ name: Python (flake8, pytest)
 
 on:
   push:
-    branches: [main, develop]
+    branches: [master, develop]
   pull_request:
-    branches: [main, develop]
+    branches: [master, develop]
 
 jobs:
   build:
-
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
@@ -19,26 +18,26 @@ jobs:
         python-version: [3.6]
 
     steps:
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: "Install Apache package"
-      run: sudo apt install -y apache2-dev
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        pytest
\ No newline at end of file
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: "Install Apache package"
+        run: sudo apt install -y apache2-dev
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install flake8 pytest
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Lint with flake8
+        run: |
+          # stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+      - name: Test with pytest
+        run: |
+          pytest

From 20e8018ccc85b1be5ac139ce8d3aec1f6dc50bdc Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 25 Jan 2024 16:50:53 +0100
Subject: [PATCH 09/30] Fix for single index samples

---
 clarity_epp/export/illumina.py | 63 +++++++++++++++++-----------------
 clarity_epp/export/utils.py    |  8 ++---
 tests/test_export_illumina.py  |  6 ++--
 tests/test_export_utils.py     |  7 ++++
 4 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 28fa276..6994f24 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -24,38 +24,35 @@ def get_project(projects, urgent=False):
 
 def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_orientation):
     """Get override cycles per sample."""
-
-    # Read cycles, Trim last base from read cycles
-    read_1_cycle = f'Y{read_len[0]-1}N1'
-    read_2_cycle = f'Y{read_len[1]-1}N1'
-
-    # Adjust read cycles if umi present
-    if umi_len[0]:
-        read_1_cycle = f'U{umi_len[0]}Y{read_len[0]-1-umi_len[0]}N1'
-    if umi_len[1]:
-        read_2_cycle = f'U{umi_len[1]}Y{read_len[1]-1-umi_len[1]}N1'
-
-    # Index cycles
-    index_1_cycle = f'I{index_len[0]}'
-    index_2_cycle = f'I{index_len[1]}'
-
-    # Adjust if index length is shorter than max index length
-    if index_len[0] < max_index_len[0]:
-        n_bases = max_index_len[0] - index_len[0]
-        index_1_cycle = f'I{index_len[0]}N{n_bases}'
-
-    if index_len[1] < max_index_len[1]:
-        n_bases = max_index_len[1] - index_len[1]
-        if index_2_orientation == 'RC':
-            index_2_cycle = f'I{index_len[1]}N{n_bases}'
-        else:  # index_2_orientation == 'F
-            index_2_cycle = f'N{n_bases}I{index_len[1]}'
+    read_cycles = ['', '']
+    index_cycles = ['', '']
+
+    for idx in range(len(read_cycles)):
+        if umi_len[idx]:  # read cycle with umi
+            read_cycle = f'U{umi_len[idx]}Y{read_len[idx]-1-umi_len[idx]}N1'
+        else:  # read cycle without umi
+            read_cycle = f'Y{read_len[idx]-1}N1'
+        read_cycles[idx] = read_cycle
+
+    for idx in range(len(index_cycles)):
+        if index_len[idx]:
+            if index_len[idx] < max_index_len[idx]:
+                n_bases = max_index_len[idx] - index_len[idx]
+                if idx == 1 and index_2_orientation == 'F':  # Index 2 in forward orientation (NovaSeq X Plus)
+                    index_cycle = f'N{n_bases}I{index_len[idx]}'
+                else:
+                    index_cycle = f'I{index_len[idx]}N{n_bases}'
+            else:
+                index_cycle = f'I{index_len[idx]}'
+        else:  # empty index, single index library
+            index_cycle = f'N{index_len[idx]}'
+        index_cycles[idx] = index_cycle
 
     override_cycles = ';'.join([
-        read_1_cycle,  # read 1
-        index_1_cycle,  # index 1
-        index_2_cycle,  # index 2
-        read_2_cycle,  # read 2
+        read_cycles[0],  # read 1
+        index_cycles[0],  # index 1
+        index_cycles[1],  # index 2
+        read_cycles[1],  # read 2
     ])
 
     return override_cycles
@@ -66,9 +63,11 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
     samplesheet_samples = {}
 
     for sample_artifact in sample_artifacts:
-        # Find sample artifact index, expected pattern = "<index name> (index1-index2)"
-        sample_index = get_sample_sequence_index(sample_artifact.reagent_labels[0])
         sample_sequence_name = get_sequence_name(sample_artifact)
+        sample_index = get_sample_sequence_index(sample_artifact.reagent_labels[0])
+        # Adjust empty second index for single index samples
+        if len(sample_index) == 1:
+            sample_index.append('')
 
         for sample in sample_artifact.samples:
             # Dx production sample
diff --git a/clarity_epp/export/utils.py b/clarity_epp/export/utils.py
index 52b1987..549e9fa 100755
--- a/clarity_epp/export/utils.py
+++ b/clarity_epp/export/utils.py
@@ -89,9 +89,9 @@ def get_well_index(well, one_based=False):
 
 def get_sample_sequence_index(reagent_label):
     """Return sample sequence indices [index1, index2] from reagent label.
-    expected reagent label pattern = "<index name> (index1-index2)
+    expected reagent label pattern = "<index name> (index1-index2)" or "<index name> (index1)"
     """
-    sample_index_search = re.search(r".*\(([ACTGN]+)-([ACTGN]+)\)$", reagent_label)
-    sample_index = [sample_index_search.group(1), sample_index_search.group(2)]
+    sample_index_search = re.search(r"\(([ACTGN-]+)\)$", reagent_label)
+    sample_index = sample_index_search.group(1).split('-')
 
-    return sample_index
\ No newline at end of file
+    return sample_index
diff --git a/tests/test_export_illumina.py b/tests/test_export_illumina.py
index fd5eb9e..da43b9e 100644
--- a/tests/test_export_illumina.py
+++ b/tests/test_export_illumina.py
@@ -2,12 +2,14 @@
 
 
 def test_get_override_cycles():
-    # Magnis prep with legacy index settings (8, 8)
+    # Magnis prep with legacy index settings (8, 8) - NovaSeq 6000
     assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [8, 8], 'RC') == 'U5Y145N1;I8;I8;U5Y145N1'
+    # Magnis prep with legacy index settings (8, 8) - NovaSeq X Plus
     assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [8, 8], 'F') == 'U5Y145N1;I8;I8;U5Y145N1'
 
-    # Magnis prep with new default index settings (19, 10)
+    # Magnis prep with new default index settings (19, 10) - NovaSeq 6000
     assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [19, 10], 'RC') == 'U5Y145N1;I8N11;I8N2;U5Y145N1'
+    # Magnis prep with new default index settings (19, 10) - NovaSeq X Plus
     assert illumina.get_override_cycles([151, 151], [5, 5], [8, 8], [19, 10], 'F') == 'U5Y145N1;I8N11;N2I8;U5Y145N1'
 
 
diff --git a/tests/test_export_utils.py b/tests/test_export_utils.py
index 0918057..f5c16c3 100644
--- a/tests/test_export_utils.py
+++ b/tests/test_export_utils.py
@@ -24,7 +24,14 @@ def test_get_well_index():
 
 
 def test_get_sample_sequence_index():
+    # Dual Index
     assert utils.get_sample_sequence_index('Dx 12D NEXTflex UDI 48 (TTAGAGTC-TGTGACGA)') == ['TTAGAGTC', 'TGTGACGA']
     assert utils.get_sample_sequence_index('Dx 10G NEXTflex custom UDI 79 (TGAGGCGC-GGAGACCA)') == ['TGAGGCGC', 'GGAGACCA']
     assert utils.get_sample_sequence_index('Dx 01G Agilent SureSelect XT HS2 UDI_v2 007 (GCAGGTTC-AGAAGCAA)') == ['GCAGGTTC', 'AGAAGCAA']
     assert utils.get_sample_sequence_index('Dx 02B Agilent SureSelect XT HS2 UDI_v1 010 (TAGAGCTC-CTACCGAA)') == ['TAGAGCTC', 'CTACCGAA']
+
+    # Single Index
+    assert utils.get_sample_sequence_index('Dx 12D NEXTflex UDI 48 (TTAGAGTC)') == ['TTAGAGTC']
+    assert utils.get_sample_sequence_index('Dx 10G NEXTflex custom UDI 79 (TGAGGCGC)') == ['TGAGGCGC']
+    assert utils.get_sample_sequence_index('Dx 01G Agilent SureSelect XT HS2 UDI_v2 007 (GCAGGTTC)') == ['GCAGGTTC']
+    assert utils.get_sample_sequence_index('Dx 02B Agilent SureSelect XT HS2 UDI_v1 010 (TAGAGCTC)') == ['TAGAGCTC']

From 1dc667d65401f3492033746ee2024c6cf02776bc Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 2 Feb 2024 16:20:55 +0100
Subject: [PATCH 10/30] Add some extra fields

---
 clarity_epp/export/illumina.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 6994f24..53266e0 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -241,11 +241,14 @@ def create_samplesheet(lims, process_id, output_file):
     # Header
     sample_sheet.append('[Header]')
     sample_sheet.append('FileFormatVersion,2')
+    sample_sheet.append('RunName,{0}'.format(process.udf['Experiment Name']))
 
     # Reads
     sample_sheet.append('[Reads]')
     sample_sheet.append('Read1Cycles,{0}'.format(process.udf['Read 1 Cycles']))
     sample_sheet.append('Read2Cycles,{0}'.format(process.udf['Read 2 Cycles']))
+    sample_sheet.append('Index1Cycles,{0}'.format(process.udf['Index Read 1']))
+    sample_sheet.append('Index2Cycles,{0}'.format(process.udf['Index Read 2']))
 
     # BCLConvert_Settings
     sample_sheet.append('[BCLConvert_Settings]')

From 163d21e7e2df3eafb0f432c8c12ee60fe75775c0 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 2 Feb 2024 17:01:04 +0100
Subject: [PATCH 11/30] Add 0 Mismatches setting for barcodes

---
 clarity_epp/export/illumina.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 53266e0..14b8e92 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -255,6 +255,8 @@ def create_samplesheet(lims, process_id, output_file):
     sample_sheet.append('AdapterRead1,{0}'.format(process.udf['Adapter']))
     sample_sheet.append('AdapterRead2,{0}'.format(process.udf['Adapter Read 2']))
     sample_sheet.append('FindAdaptersWithIndels,true')
+    sample_sheet.append('BarcodeMismatchesIndex1,0')
+    sample_sheet.append('BarcodeMismatchesIndex2,0')
 
     # BCLConvert_Data
     sample_sheet.append('[BCLConvert_Data]')

From 67f35c6c357de25306a5527ae47f9fa56d4acc8f Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 12 Feb 2024 17:32:52 +0100
Subject: [PATCH 12/30] Code review changes

---
 clarity_epp/export/illumina.py | 92 ++++++++++++++++------------------
 1 file changed, 44 insertions(+), 48 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 14b8e92..b75994b 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -10,7 +10,9 @@
 
 
 def get_project(projects, urgent=False):
-    """Get a project name for sample."""
+    """Get a project name from projects dict ({'project_name': sample_count, ...})
+    If urgent is True, return the first project with < 9 samples, else return the project with the least amount of samples.
+    """
     if urgent:  # Sort projects for urgent samples on name
         projects_sorted = sorted(projects.items(), key=operator.itemgetter(0))
         for project in projects_sorted:
@@ -72,10 +74,10 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
         for sample in sample_artifact.samples:
             # Dx production sample
             if (
-                'Dx Familienummer' in list(sample.udf) and
-                'Dx NICU Spoed' in list(sample.udf) and
-                'Dx Protocolomschrijving' in list(sample.udf) and
-                'Dx Stoftest code' in list(sample.udf)
+                'Dx Familienummer' in sample.udf and
+                'Dx NICU Spoed' in sample.udf and
+                'Dx Protocolomschrijving' in sample.udf and
+                'Dx Stoftest code' in sample.udf
             ):
                 # Skip Mengfractie samples
                 if sample.udf['Dx Stoftest code'] == config.stoftestcode_wes_duplo:
@@ -85,7 +87,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                 sample_conversion_setting = config.conversion_settings['default']
                 newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
                 for protocol_code in config.conversion_settings:
-                    if protocol_code in newest_protocol:
+                    if protocol_code in newest_protocol:  # Look for protocol code (elid number) in newest protocol
                         sample_conversion_setting = config.conversion_settings[protocol_code]
                         break
 
@@ -236,56 +238,50 @@ def create_samplesheet(lims, process_id, output_file):
         samplesheet_samples.append(get_samplesheet_samples(sample_artifacts, process, index_2_orientation))
 
     # Create SampleSheet
-    sample_sheet = []
-
-    # Header
-    sample_sheet.append('[Header]')
-    sample_sheet.append('FileFormatVersion,2')
-    sample_sheet.append('RunName,{0}'.format(process.udf['Experiment Name']))
-
-    # Reads
-    sample_sheet.append('[Reads]')
-    sample_sheet.append('Read1Cycles,{0}'.format(process.udf['Read 1 Cycles']))
-    sample_sheet.append('Read2Cycles,{0}'.format(process.udf['Read 2 Cycles']))
-    sample_sheet.append('Index1Cycles,{0}'.format(process.udf['Index Read 1']))
-    sample_sheet.append('Index2Cycles,{0}'.format(process.udf['Index Read 2']))
-
-    # BCLConvert_Settings
-    sample_sheet.append('[BCLConvert_Settings]')
-    sample_sheet.append('AdapterRead1,{0}'.format(process.udf['Adapter']))
-    sample_sheet.append('AdapterRead2,{0}'.format(process.udf['Adapter Read 2']))
-    sample_sheet.append('FindAdaptersWithIndels,true')
-    sample_sheet.append('BarcodeMismatchesIndex1,0')
-    sample_sheet.append('BarcodeMismatchesIndex2,0')
+    sample_sheet = [
+        # Header
+        "[Header]",
+        "FileFormatVersion,2",
+        f"RunName,{process.udf['Experiment Name']}",
+        # Reads
+        "[Reads]",
+        f"Read1Cycles,{process.udf['Read 1 Cycles']}",
+        f"Read2Cycles,{process.udf['Read 2 Cycles']}",
+        f"Index1Cycles,{process.udf['Index Read 1']}",
+        f"Index2Cycles,{process.udf['Index Read 2']}",
+        # BCLConvert_Settings
+        "[BCLConvert_Settings]",
+        f"AdapterRead1,{process.udf['Adapter']}",
+        f"AdapterRead2,{process.udf['Adapter Read 2']}",
+        "FindAdaptersWithIndels,true",
+        "BarcodeMismatchesIndex1,0",
+        "BarcodeMismatchesIndex2,0",
+        "[BCLConvert_Data]"
+    ]
 
     # BCLConvert_Data
-    sample_sheet.append('[BCLConvert_Data]')
+    # Set header for single or multiple lanes conversion
+    bcl_convert_data_header = "Sample_ID,index,index2,OverrideCycles,Sample_Project"
     if len(samplesheet_samples) == 1:  # All samples on all lanes
-        lane = 0
-        sample_sheet.append('Sample_ID,index,index2,OverrideCycles,Sample_Project')
-        for sample in samplesheet_samples[lane]:
-            sample_sheet.append(
-                '{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
+        multiple_lanes = False
+    else:
+        multiple_lanes = True
+        bcl_convert_data_header = f"Lane,{bcl_convert_data_header}"  # Add lane column to header if multiple lanes conversion
+    sample_sheet.append(bcl_convert_data_header)
+
+    # Add samples to SampleSheet
+    for lane, lane_samples in enumerate(samplesheet_samples):
+        for sample in lane_samples:
+            bcl_convert_data_row = "{sample_name},{index_1},{index_2},{override_cycles},{project}".format(
                     sample_name=sample,
                     index_1=samplesheet_samples[lane][sample]['index_1'],
                     index_2=samplesheet_samples[lane][sample]['index_2'],
                     override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
                     project=samplesheet_samples[lane][sample]['project']
                 )
-            )
-    else:  # Samples divided over lanes
-        sample_sheet.append('Lane,Sample_ID,index,index2,OverrideCycles,Sample_Project')
-        for lane, lane_samples in enumerate(samplesheet_samples):
-            for sample in lane_samples:
-                sample_sheet.append(
-                    '{lane},{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
-                        lane=lane+1,
-                        sample_name=sample,
-                        index_1=samplesheet_samples[lane][sample]['index_1'],
-                        index_2=samplesheet_samples[lane][sample]['index_2'],
-                        override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
-                        project=samplesheet_samples[lane][sample]['project']
-                    )
-                )
+            if multiple_lanes:  # Add lane number to row if multiple lanes conversion
+                bcl_convert_data_row = f"{lane+1},{bcl_convert_data_row}"
+            sample_sheet.append(bcl_convert_data_row)
 
+    # Write SampleSheet to file
     output_file.write('\n'.join(sample_sheet))

From 02fe411ef0b9d4793eb10c981454b369032d4f8a Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 12 Feb 2024 17:35:54 +0100
Subject: [PATCH 13/30] Layout

---
 clarity_epp/export/illumina.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index b75994b..f35b8de 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -256,10 +256,10 @@ def create_samplesheet(lims, process_id, output_file):
         "FindAdaptersWithIndels,true",
         "BarcodeMismatchesIndex1,0",
         "BarcodeMismatchesIndex2,0",
+        # BCLConvert_Data
         "[BCLConvert_Data]"
     ]
 
-    # BCLConvert_Data
     # Set header for single or multiple lanes conversion
     bcl_convert_data_header = "Sample_ID,index,index2,OverrideCycles,Sample_Project"
     if len(samplesheet_samples) == 1:  # All samples on all lanes

From 0c5ee77a6bfa0b07969a3658333eedcf8f917b7c Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Tue, 13 Feb 2024 11:59:01 +0100
Subject: [PATCH 14/30] Add indications_exome_equivalent

---
 clarity_epp/upload/samples.py | 4 ++++
 config.py                     | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/clarity_epp/upload/samples.py b/clarity_epp/upload/samples.py
index b858e0c..cba9527 100644
--- a/clarity_epp/upload/samples.py
+++ b/clarity_epp/upload/samples.py
@@ -166,6 +166,10 @@ def from_helix(lims, email_settings, input_file):
         if udf_data['Dx Onderzoeksindicatie'] == 'DSD00' and udf_data['Dx Familie status'] == 'Kind':
             udf_data['Dx Geslacht'] = 'Onbekend'
 
+        # Set 'Dx Exoomequivalent' for specific indications
+        if udf_data['Dx Onderzoeksindicatie'] in config.indications_exome_equivalent:
+            udf_data['Dx Exoomequivalent'] = config.indications_exome_equivalent[udf_data['Dx Onderzoeksindicatie']]
+
         # Check 'Dx Familienummer' and correct
         if '/' in udf_data['Dx Familienummer']:
             udf_data['Dx Import warning'] = ';'.join([
diff --git a/config.py b/config.py
index c002932..4b45714 100755
--- a/config.py
+++ b/config.py
@@ -30,6 +30,9 @@
     stoftestcode_mip: '1651',  # DEV Dx smMIP v1.2
 }
 
+# Update exome equivalent for certain indications
+indications_exome_equivalent = {'UBA1': 5, 'PID09': 5}
+
 # Export meetw protocol steps WES
 meetw_zui_wes_processes = [
     'Dx Sample registratie zuivering v1.1',

From 94848dba6fce1b124818c97dd20be1f176d20190 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 15 Feb 2024 21:47:14 +0100
Subject: [PATCH 15/30] Change volume

---
 clarity_epp/export/manual_pipetting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clarity_epp/export/manual_pipetting.py b/clarity_epp/export/manual_pipetting.py
index 81cea0c..09a007e 100755
--- a/clarity_epp/export/manual_pipetting.py
+++ b/clarity_epp/export/manual_pipetting.py
@@ -681,9 +681,9 @@ def samplesheet_pool_samples(lims, process_id, output_file):
             input_sample = input_artifact.samples[0]  # Asume one sample
 
             if 'Dx Exoomequivalent' in input_sample.udf:
-                volume = 5 * input_sample.udf['Dx Exoomequivalent']
+                volume = 4 * input_sample.udf['Dx Exoomequivalent']
             else:
-                volume = 5
+                volume = 4
 
             output_file.write(
                 '{sample}\t{container}\t{well}\t{pool}\t{volume}\n'.format(

From 6ccd9cdd2ed98d39ab3d2ceba603f0e9132a225a Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 15 Feb 2024 22:53:24 +0100
Subject: [PATCH 16/30] Add extra required conversion settings

---
 clarity_epp/export/illumina.py | 34 +++++++++++++++++++++-------------
 config.py                      | 20 ++++++++++++++++----
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index f35b8de..f536868 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -24,7 +24,7 @@ def get_project(projects, urgent=False):
     return projects_sorted[0][0]  # return project with least amount of samples.
 
 
-def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_orientation):
+def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_conversion_orientation):
     """Get override cycles per sample."""
     read_cycles = ['', '']
     index_cycles = ['', '']
@@ -40,7 +40,7 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_ori
         if index_len[idx]:
             if index_len[idx] < max_index_len[idx]:
                 n_bases = max_index_len[idx] - index_len[idx]
-                if idx == 1 and index_2_orientation == 'F':  # Index 2 in forward orientation (NovaSeq X Plus)
+                if idx == 1 and index_2_conversion_orientation == 'F':  # Index 2 in forward orientation (NovaSeq X Plus)
                     index_cycle = f'N{n_bases}I{index_len[idx]}'
                 else:
                     index_cycle = f'I{index_len[idx]}N{n_bases}'
@@ -60,7 +60,7 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_ori
     return override_cycles
 
 
-def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
+def get_samplesheet_samples(sample_artifacts, process, index_2_conversion_orientation):
     families = {}
     samplesheet_samples = {}
 
@@ -83,12 +83,12 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                 if sample.udf['Dx Stoftest code'] == config.stoftestcode_wes_duplo:
                     continue
 
-                # Get sample conversion_settings
-                sample_conversion_setting = config.conversion_settings['default']
+                # Get sample conversion settings
+                sample_conversion_setting = config.sample_conversion_settings['default']
                 newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
-                for protocol_code in config.conversion_settings:
+                for protocol_code in config.sample_conversion_settings:
                     if protocol_code in newest_protocol:  # Look for protocol code (elid number) in newest protocol
-                        sample_conversion_setting = config.conversion_settings[protocol_code]
+                        sample_conversion_setting = config.sample_conversion_settings[protocol_code]
                         break
 
                 # Get sample override cycles
@@ -97,7 +97,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                     umi_len=sample_conversion_setting['umi_len'],
                     index_len=[len(sample_index[0]), len(sample_index[1])],
                     max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
-                    index_2_orientation=index_2_orientation
+                    index_2_conversion_orientation=index_2_conversion_orientation
                 )
 
                 # Set family and create if not exist
@@ -158,10 +158,10 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                 else:
                     sample_override_cycles = get_override_cycles(
                         read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
-                        umi_len=config.conversion_settings['default']['umi_len'],
+                        umi_len=config.sample_conversion_settings['default']['umi_len'],
                         index_len=[len(sample_index[0]), len(sample_index[1])],
                         max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
-                        index_2_orientation=index_2_orientation
+                        index_2_conversion_orientation=index_2_conversion_orientation
                     )
 
             # Add sample to samplesheet_samples
@@ -170,7 +170,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
                 'index_2': sample_index[1],
                 'override_cycles': sample_override_cycles,
             }
-            if index_2_orientation == 'RC':  # Reverse complement index 2
+            if index_2_conversion_orientation == 'RC':  # Reverse complement index 2
                 samplesheet_samples[sample_sequence_name]['index_2'] = reverse_complement(
                     samplesheet_samples[sample_sequence_name]['index_2']
                 )
@@ -229,19 +229,25 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
 def create_samplesheet(lims, process_id, output_file):
     """Create illumina samplesheet v2."""
     process = Process(lims, id=process_id)
-    index_2_orientation = config.index_2_orientation[process.type.name]
+    sequencer_conversion_settings = config.sequencer_conversion_settings[process.type.name]
 
     # Get samples samples per lane
     samplesheet_samples = []
     for lane in process.analytes()[0]:
         sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
-        samplesheet_samples.append(get_samplesheet_samples(sample_artifacts, process, index_2_orientation))
+        samplesheet_samples.append(
+            get_samplesheet_samples(
+                sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
+            )
+        )
 
     # Create SampleSheet
     sample_sheet = [
         # Header
         "[Header]",
         "FileFormatVersion,2",
+        f"InstrumentPlatform,{sequencer_conversion_settings['instrument_platform']}",
+        f"IndexOrientation,{sequencer_conversion_settings['index_orientation']}",
         f"RunName,{process.udf['Experiment Name']}",
         # Reads
         "[Reads]",
@@ -251,6 +257,8 @@ def create_samplesheet(lims, process_id, output_file):
         f"Index2Cycles,{process.udf['Index Read 2']}",
         # BCLConvert_Settings
         "[BCLConvert_Settings]",
+        f"SoftwareVersion,{sequencer_conversion_settings['software_version']}",
+        f"FastqCompressionFormat,{sequencer_conversion_settings['fastq_compression_format']}",
         f"AdapterRead1,{process.udf['Adapter']}",
         f"AdapterRead2,{process.udf['Adapter Read 2']}",
         "FindAdaptersWithIndels,true",
diff --git a/config.py b/config.py
index 09619a9..dd9a1d2 100755
--- a/config.py
+++ b/config.py
@@ -91,13 +91,25 @@
 ]
 
 # BCLConvert conversion settings
-index_2_orientation = {
+sequencer_conversion_settings = {
     # Orientation options: F=forward or RC=reverse complement
     # https://knowledge.illumina.com/software/general/software-general-reference_material-list/000001800
-    'Dx Library pool denatureren en laden (NovaSeq) v1.3': 'RC',
-    'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': 'F',
+    'Dx Library pool denatureren en laden (NovaSeq) v1.3': {
+        'index_2_conversion_orientation': 'RC',
+        'instrument_platform': 'NovaSeq',
+        'index_orientation': 'Forward',
+        'software_version': '4.1.7',
+        'fastq_compression_format': 'GZIP',
+    },
+    'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': {
+        'index_2_conversion_orientation': 'F',
+        'instrument_platform': 'NovaSeqXPlus',
+        'index_orientation': 'Forward',
+        'software_version': '4.1.7',
+        'fastq_compression_format': 'GZIP',
+    },
 }
-conversion_settings = {
+sample_conversion_settings = {
     'default': {
         'project': 'unknown',
         'split_project': False,

From 4bf19f0d122f62e65effb3878a8b29953b95f508 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Tue, 20 Feb 2024 15:48:54 +0100
Subject: [PATCH 17/30] Tweaks based on lab input

---
 clarity_epp/export/illumina.py | 2 +-
 config.py                      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index f536868..5b00707 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -261,7 +261,7 @@ def create_samplesheet(lims, process_id, output_file):
         f"FastqCompressionFormat,{sequencer_conversion_settings['fastq_compression_format']}",
         f"AdapterRead1,{process.udf['Adapter']}",
         f"AdapterRead2,{process.udf['Adapter Read 2']}",
-        "FindAdaptersWithIndels,true",
+        "FindAdaptersWithIndels,TRUE",
         "BarcodeMismatchesIndex1,0",
         "BarcodeMismatchesIndex2,0",
         # BCLConvert_Data
diff --git a/config.py b/config.py
index dd9a1d2..3659e7b 100755
--- a/config.py
+++ b/config.py
@@ -99,14 +99,14 @@
         'instrument_platform': 'NovaSeq',
         'index_orientation': 'Forward',
         'software_version': '4.1.7',
-        'fastq_compression_format': 'GZIP',
+        'fastq_compression_format': 'gzip',
     },
     'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': {
         'index_2_conversion_orientation': 'F',
-        'instrument_platform': 'NovaSeqXPlus',
+        'instrument_platform': 'NovaSeqXSeries',
         'index_orientation': 'Forward',
         'software_version': '4.1.7',
-        'fastq_compression_format': 'GZIP',
+        'fastq_compression_format': 'gzip',
     },
 }
 sample_conversion_settings = {

From 94316306d58c95487e62c212a461a8dd7b885175 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Wed, 21 Feb 2024 14:23:40 +0100
Subject: [PATCH 18/30] python3

---
 clarity_epp/export/bioanalyzer.py      | 2 +-
 clarity_epp/export/manual_pipetting.py | 2 ++
 clarity_epp/export/tapestation.py      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clarity_epp/export/bioanalyzer.py b/clarity_epp/export/bioanalyzer.py
index 1e619f6..0eefe96 100644
--- a/clarity_epp/export/bioanalyzer.py
+++ b/clarity_epp/export/bioanalyzer.py
@@ -16,7 +16,7 @@ def samplesheet(lims, process_id, output_file):
     }
 
     # Get sample placement
-    for placement, artifact in process.output_containers()[0].placements.iteritems():
+    for placement, artifact in process.output_containers()[0].placements.items():
         placement = ''.join(placement.split(':'))
         plate[placement]['name'] = artifact.name
         plate[placement]['comment'] = ''
diff --git a/clarity_epp/export/manual_pipetting.py b/clarity_epp/export/manual_pipetting.py
index 09a007e..95400c2 100755
--- a/clarity_epp/export/manual_pipetting.py
+++ b/clarity_epp/export/manual_pipetting.py
@@ -362,6 +362,8 @@ def samplesheet_multiplex_sequence_pool(lims, process_id, output_file):
     # print header
     output_file.write('Naam\tuL\n')
 
+    print(total_sample_count)
+    print(input_pools)
     # Last calcuations and print sample
     for input_pool in input_pools:
         input_pool_load_pM = (float(process.udf['Dx Laadconcentratie (pM)'])/total_sample_count) * input_pool['sample_count']
diff --git a/clarity_epp/export/tapestation.py b/clarity_epp/export/tapestation.py
index b7fcc32..f39d52e 100644
--- a/clarity_epp/export/tapestation.py
+++ b/clarity_epp/export/tapestation.py
@@ -10,7 +10,7 @@ def samplesheet(lims, process_id, output_file):
     process = Process(lims, id=process_id)
     well_plate = {}
 
-    for placement, artifact in process.output_containers()[0].placements.iteritems():
+    for placement, artifact in process.output_containers()[0].placements.items():
         placement = ''.join(placement.split(':'))
         well_plate[placement] = artifact.name.split('_')[0]
 

From f40ed380c72f3cc24b84fe48d69ada138b6f7bfa Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Wed, 21 Feb 2024 14:33:30 +0100
Subject: [PATCH 19/30] python3

---
 clarity_epp/export/hamilton.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clarity_epp/export/hamilton.py b/clarity_epp/export/hamilton.py
index d4c589b..0002df4 100755
--- a/clarity_epp/export/hamilton.py
+++ b/clarity_epp/export/hamilton.py
@@ -11,7 +11,7 @@ def samplesheet_filling_out(lims, process_id, output_file):
     process = Process(lims, id=process_id)
     well_plate = {}
 
-    for placement, artifact in process.output_containers()[0].placements.iteritems():
+    for placement, artifact in process.output_containers()[0].placements.items():
         placement = ''.join(placement.split(':'))
         well_plate[placement] = artifact.samples[0].udf['Dx Fractienummer']
 
@@ -29,7 +29,7 @@ def samplesheet_purify(lims, process_id, output_file):
     parent_process_barcode = process.parent_processes()[0].output_containers()[0].name
     well_plate = {}
 
-    for placement, artifact in process.output_containers()[0].placements.iteritems():
+    for placement, artifact in process.output_containers()[0].placements.items():
         placement = ''.join(placement.split(':'))
         well_plate[placement] = artifact.samples[0].udf['Dx Fractienummer']
 

From 9192ec4612f29e07b2a5f92d2073a2a9614d6fda Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 23 Feb 2024 09:03:05 +0100
Subject: [PATCH 20/30] Remove debug prints

---
 clarity_epp/export/manual_pipetting.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clarity_epp/export/manual_pipetting.py b/clarity_epp/export/manual_pipetting.py
index 95400c2..09a007e 100755
--- a/clarity_epp/export/manual_pipetting.py
+++ b/clarity_epp/export/manual_pipetting.py
@@ -362,8 +362,6 @@ def samplesheet_multiplex_sequence_pool(lims, process_id, output_file):
     # print header
     output_file.write('Naam\tuL\n')
 
-    print(total_sample_count)
-    print(input_pools)
     # Last calcuations and print sample
     for input_pool in input_pools:
         input_pool_load_pM = (float(process.udf['Dx Laadconcentratie (pM)'])/total_sample_count) * input_pool['sample_count']

From 9be5ff25fed7bfd7b1235b972747d50864899024 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 29 Feb 2024 09:25:38 +0100
Subject: [PATCH 21/30] Fix 'map' object is not subscriptable

---
 clarity_epp/qc/qubit.py     | 2 +-
 clarity_epp/upload/tecan.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clarity_epp/qc/qubit.py b/clarity_epp/qc/qubit.py
index fcc07b6..2c16aee 100644
--- a/clarity_epp/qc/qubit.py
+++ b/clarity_epp/qc/qubit.py
@@ -9,7 +9,7 @@ def set_qc_flag(lims, process_id, cutoff=10):
     """Set qubit qc flags based on Dx Concentratie fluorescentie (ng/ul) values."""
     process = Process(lims, id=process_id)
     artifacts = process.result_files()
-    concentration_range = map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)']))
+    concentration_range = list(map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)'])))
     samples_measurements = {}
 
     for artifact in artifacts:
diff --git a/clarity_epp/upload/tecan.py b/clarity_epp/upload/tecan.py
index d0e33c0..1dbbaec 100644
--- a/clarity_epp/upload/tecan.py
+++ b/clarity_epp/upload/tecan.py
@@ -10,7 +10,7 @@
 def results_qc(lims, process_id):
     """Upload tecan results to artifacts."""
     process = Process(lims, id=process_id)
-    concentration_range = map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)']))
+    concentration_range = list(map(float, re.findall('[\d\.]+', process.udf['Concentratiebereik (ng/ul)'])))
 
     # Parse output file
     for output in process.all_outputs(unique=True):

From b45a587131f0ddd6fc2ac9abfc902f2226ee009f Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 29 Feb 2024 11:58:36 +0100
Subject: [PATCH 22/30] Add default setting for TrimUMI

---
 clarity_epp/export/illumina.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 5b00707..ece8512 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -264,6 +264,7 @@ def create_samplesheet(lims, process_id, output_file):
         "FindAdaptersWithIndels,TRUE",
         "BarcodeMismatchesIndex1,0",
         "BarcodeMismatchesIndex2,0",
+        "TrimUMI,TRUE",
         # BCLConvert_Data
         "[BCLConvert_Data]"
     ]

From 2e6bcec0b67640d53009d413cfeb031d36643cff Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Thu, 29 Feb 2024 13:19:06 +0100
Subject: [PATCH 23/30] Change or to and

---
 clarity_epp/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clarity_epp/__init__.py b/clarity_epp/__init__.py
index e41654a..61e1013 100644
--- a/clarity_epp/__init__.py
+++ b/clarity_epp/__init__.py
@@ -38,8 +38,8 @@ def get_sample_artifacts_from_pool(lims, pool_artifact):
                 # Check if sample_artifact with 2 samples are from the same person
                 if len(sample_artifact.samples) == 2:
                     if (
-                        'Dx Persoons ID' in sample_artifact.samples[0].udf or
-                        'Dx Persoons ID' in sample_artifact.samples[1].udf or
+                        'Dx Persoons ID' in sample_artifact.samples[0].udf and
+                        'Dx Persoons ID' in sample_artifact.samples[1].udf and
                         sample_artifact.samples[0].udf['Dx Persoons ID'] == sample_artifact.samples[1].udf['Dx Persoons ID']
                     ):
                         sample_artifacts.append(sample_artifact)

From b6a9dbd9a52df668e816694facf0727822546c13 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 1 Mar 2024 14:19:53 +0100
Subject: [PATCH 24/30] Fix lane, merge, ped exports

---
 clarity_epp/export/illumina.py | 28 +++++++++++++++-------------
 clarity_epp/export/merge.py    | 11 ++++++++++-
 clarity_epp/export/ped.py      | 12 +++++++++++-
 3 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index ece8512..89f21fb 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -231,14 +231,16 @@ def create_samplesheet(lims, process_id, output_file):
     process = Process(lims, id=process_id)
     sequencer_conversion_settings = config.sequencer_conversion_settings[process.type.name]
 
+    # Get output container assume one flowcell per sequencing run
+    output_container = process.output_containers()[0]
+
     # Get samples samples per lane
-    samplesheet_samples = []
-    for lane in process.analytes()[0]:
-        sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
-        samplesheet_samples.append(
-            get_samplesheet_samples(
-                sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
-            )
+    samplesheet_samples = {}
+    for lane_idx, lane_artifact in output_container.get_placements().items():
+        lane_idx = lane_idx.split(':')[0]
+        sample_artifacts = get_sample_artifacts_from_pool(lims, lane_artifact)
+        samplesheet_samples[lane_idx] = get_samplesheet_samples(
+            sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
         )
 
     # Create SampleSheet
@@ -279,17 +281,17 @@ def create_samplesheet(lims, process_id, output_file):
     sample_sheet.append(bcl_convert_data_header)
 
     # Add samples to SampleSheet
-    for lane, lane_samples in enumerate(samplesheet_samples):
+    for lane, lane_samples in samplesheet_samples.items():
         for sample in lane_samples:
             bcl_convert_data_row = "{sample_name},{index_1},{index_2},{override_cycles},{project}".format(
                     sample_name=sample,
-                    index_1=samplesheet_samples[lane][sample]['index_1'],
-                    index_2=samplesheet_samples[lane][sample]['index_2'],
-                    override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
-                    project=samplesheet_samples[lane][sample]['project']
+                    index_1=lane_samples[sample]['index_1'],
+                    index_2=lane_samples[sample]['index_2'],
+                    override_cycles=lane_samples[sample]['override_cycles'],
+                    project=lane_samples[sample]['project']
                 )
             if multiple_lanes:  # Add lane number to row if multiple lanes conversion
-                bcl_convert_data_row = f"{lane+1},{bcl_convert_data_row}"
+                bcl_convert_data_row = f"{lane},{bcl_convert_data_row}"
             sample_sheet.append(bcl_convert_data_row)
 
     # Write SampleSheet to file
diff --git a/clarity_epp/export/merge.py b/clarity_epp/export/merge.py
index bab9afd..7095800 100644
--- a/clarity_epp/export/merge.py
+++ b/clarity_epp/export/merge.py
@@ -7,7 +7,16 @@
 def create_file(lims, process_id, output_file):
     """Create mege file."""
     process = Process(lims, id=process_id)
-    sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
+
+    # Get output container assume one flowcell per sequencing run
+    output_container = process.output_containers()[0]
+
+    # Get unique sample artifacts in run
+    sample_artifacts = []
+    for lane_artifact in output_container.get_placements().values():
+        for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):
+            if sample_artifact not in sample_artifacts:
+                sample_artifacts.append(sample_artifact)
 
     output_file.write('Sample\tMerge 1 Sample\tMerge 1 Sequencing Run\tMerge 2 Sample\tMerge 2 Sequencing Run\n')
 
diff --git a/clarity_epp/export/ped.py b/clarity_epp/export/ped.py
index 836d7af..a659308 100644
--- a/clarity_epp/export/ped.py
+++ b/clarity_epp/export/ped.py
@@ -7,7 +7,17 @@
 def create_file(lims, process_id, output_file):
     """Create ped file."""
     process = Process(lims, id=process_id)
-    sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
+
+    # Get output container assume one flowcell per sequencing run
+    output_container = process.output_containers()[0]
+
+    # Get unique sample artifacts in run
+    sample_artifacts = []
+    for lane_artifact in output_container.get_placements().values():
+        for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):
+            if sample_artifact not in sample_artifacts:
+                sample_artifacts.append(sample_artifact)
+
     ped_families = {}
 
     for sample_artifact in sample_artifacts:

From a2ceef60d3ee669b70a90b74ae5a0a5d8611719b Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 1 Mar 2024 15:31:48 +0100
Subject: [PATCH 25/30] Sort by lane

---
 clarity_epp/export/illumina.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clarity_epp/export/illumina.py b/clarity_epp/export/illumina.py
index 89f21fb..f4ee596 100644
--- a/clarity_epp/export/illumina.py
+++ b/clarity_epp/export/illumina.py
@@ -281,7 +281,7 @@ def create_samplesheet(lims, process_id, output_file):
     sample_sheet.append(bcl_convert_data_header)
 
     # Add samples to SampleSheet
-    for lane, lane_samples in samplesheet_samples.items():
+    for lane, lane_samples in sorted(samplesheet_samples.items()):
         for sample in lane_samples:
             bcl_convert_data_row = "{sample_name},{index_1},{index_2},{override_cycles},{project}".format(
                     sample_name=sample,

From af57ce0897a9662ccda2a6e780ed833530b9075f Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Mon, 4 Mar 2024 09:47:19 +0100
Subject: [PATCH 26/30] Add duplicate code todo

---
 clarity_epp/export/merge.py | 1 +
 clarity_epp/export/ped.py   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/clarity_epp/export/merge.py b/clarity_epp/export/merge.py
index 7095800..e360f5f 100644
--- a/clarity_epp/export/merge.py
+++ b/clarity_epp/export/merge.py
@@ -12,6 +12,7 @@ def create_file(lims, process_id, output_file):
     output_container = process.output_containers()[0]
 
     # Get unique sample artifacts in run
+    # TODO: This is a copy of the code from ped.py. It should be refactored to a common function.
     sample_artifacts = []
     for lane_artifact in output_container.get_placements().values():
         for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):
diff --git a/clarity_epp/export/ped.py b/clarity_epp/export/ped.py
index a659308..822b0e3 100644
--- a/clarity_epp/export/ped.py
+++ b/clarity_epp/export/ped.py
@@ -12,6 +12,7 @@ def create_file(lims, process_id, output_file):
     output_container = process.output_containers()[0]
 
     # Get unique sample artifacts in run
+    # TODO: This is a copy of the code from merge.py. It should be refactored to a common function.
     sample_artifacts = []
     for lane_artifact in output_container.get_placements().values():
         for sample_artifact in get_sample_artifacts_from_pool(lims, lane_artifact):

From 4eca97919cfa3d84b4298e5089c24f9410a310bd Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Tue, 5 Mar 2024 16:22:48 +0100
Subject: [PATCH 27/30] Fix file encoding

---
 clarity_epp/upload/tecan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clarity_epp/upload/tecan.py b/clarity_epp/upload/tecan.py
index 1dbbaec..bd025e0 100644
--- a/clarity_epp/upload/tecan.py
+++ b/clarity_epp/upload/tecan.py
@@ -21,7 +21,7 @@ def results_qc(lims, process_id):
 
             measurements = {}
             sample_measurements = {}
-            for line in lims.get_file_contents(tecan_result_file.id).data.split('\n'):
+            for line in lims.get_file_contents(tecan_result_file.id).data.decode('utf-8').split('\n'):
                 if not line.startswith('<>'):
                     data = line.rstrip().split('\t')
                     for index, value in enumerate(data[1:]):

From d1969acce452e9556bad725654ad6389a8639ee2 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 8 Mar 2024 15:15:25 +0100
Subject: [PATCH 28/30] Fix for NovaSeqXPlus workflow configuration

---
 clarity_epp/placement/artifact.py | 15 ++++++++----
 clarity_epp/placement/pool.py     | 38 ++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/clarity_epp/placement/artifact.py b/clarity_epp/placement/artifact.py
index aeead75..d96bd1f 100644
--- a/clarity_epp/placement/artifact.py
+++ b/clarity_epp/placement/artifact.py
@@ -3,6 +3,7 @@
 from genologics.entities import Process, Workflow
 
 from .. import get_sequence_name
+from clarity_epp.export.utils import sort_artifact_list
 import config
 
 
@@ -17,19 +18,23 @@ def set_sequence_name(lims, process_id):
 def set_runid_name(lims, process_id):
     """Change artifact name to run id."""
     process = Process(lims, id=process_id)
-    analyte = process.analytes()[0][0]
     input_artifact = process.all_inputs()[0]
 
-    container_name = analyte.container.name
+    # Fix for NovaSeqXPlus workflow configuration
+    # TODO: Set NovaSeqXPlus step to 'Analysis' type.
+    if 'NovaSeqXPlus' in input_artifact.parent_process.type.name:
+        input_artifact = input_artifact.parent_process.all_inputs()[0]
 
     # Find sequencing process
     # Assume one sequence process per input artifact
     for sequence_process_type in config.sequence_process_types:
         sequence_processes = lims.get_processes(type=sequence_process_type, inputartifactlimsid=input_artifact.id)
         for sequence_process in sequence_processes:
-            if sequence_process.analytes()[0][0].container.name == container_name:
-                analyte.name = sequence_process.udf['Run ID']
-                analyte.put()
+            sequence_process_lanes = sorted(sequence_process.analytes()[0], key=sort_artifact_list)
+            for lane_idx, lane in enumerate(sorted(process.analytes()[0], key=sort_artifact_list)):
+                if sequence_process_lanes[lane_idx].container.name == lane.container.name:
+                    lane.name = sequence_process.udf['Run ID']
+                    lane.put()
 
 
 def route_to_workflow(lims, process_id, workflow):
diff --git a/clarity_epp/placement/pool.py b/clarity_epp/placement/pool.py
index ae69b92..f733046 100644
--- a/clarity_epp/placement/pool.py
+++ b/clarity_epp/placement/pool.py
@@ -14,6 +14,11 @@ def unpooling(lims, process_id):
         pool_artifact = process.all_inputs()[0]
         pool_artifact_parent_process = pool_artifact.parent_process
 
+        # Fix for NovaSeqXPlus workflow configuration
+        # TODO: Set NovaSeqXPlus step to 'Analysis' type.
+        if 'laden' not in pool_artifact_parent_process.type.name.lower():
+            pool_artifact_parent_process = pool_artifact_parent_process.all_inputs()[0].parent_process
+
         run_id = pool_artifact.name  # Assume run id is set as pool name using placement/artifact/set_runid_name
         sample_artifacts = []  # sample artifacts before pooling
         sample_projects = {}
@@ -35,22 +40,23 @@ def unpooling(lims, process_id):
                         sample_projects[data[sample_index]] = data[project_index]
 
         # Parse sequencing run samples and move Dx samples to post sequencing workflow
-        for sample_artifact in get_sample_artifacts_from_pool(lims, pool_artifact):
-            sample = sample_artifact.samples[0]   # Asume all samples metadata is identical.
-
-            # Set sample sequencing run and project
-            sample_artifact.udf['Dx Sequencing Run ID'] = run_id
-            # Use sample.name for external (clarity_portal) samples
-            if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']:
-                sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name]
-            else:  # Use sample_artifact.name for Dx samples (upload via Helix)
-                sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name]
-            sample_artifact.put()
-
-            # Only move DX production samples to post sequencing workflow
-            if sample.project and sample.project.udf['Application'] == 'DX':
-                sample_artifacts.append(sample_artifact)
-
+        # for lane in
+        for lane in process.all_inputs():
+            for sample_artifact in get_sample_artifacts_from_pool(lims, lane):
+                sample = sample_artifact.samples[0]   # Asume all samples metadata is identical.
+
+                # Set sample sequencing run and project
+                sample_artifact.udf['Dx Sequencing Run ID'] = run_id
+                # Use sample.name for external (clarity_portal) samples
+                if 'Sample Type' in sample.udf and 'library' in sample.udf['Sample Type']:
+                    sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample.name]
+                else:  # Use sample_artifact.name for Dx samples (upload via Helix)
+                    sample_artifact.udf['Dx Sequencing Run Project'] = sample_projects[sample_artifact.name]
+                sample_artifact.put()
+
+                # Only move DX production samples to post sequencing workflow
+                if sample_artifact not in sample_artifacts and sample.project and sample.project.udf['Application'] == 'DX':
+                    sample_artifacts.append(sample_artifact)
         lims.route_artifacts(sample_artifacts, workflow_uri=Workflow(lims, id=config.post_sequencing_workflow).uri)
 
 

From 36ed301967bc030823ea8b85f7bc411b5fc50dd3 Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 8 Mar 2024 15:24:39 +0100
Subject: [PATCH 29/30] Add Dx NovaSeqXPlus Run v1.0

---
 config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.py b/config.py
index 3659e7b..d6102c7 100755
--- a/config.py
+++ b/config.py
@@ -88,6 +88,7 @@
     'Dx NextSeq Run v1.0', 'Dx NextSeq Run v1.1',
     'Dx Automated NovaSeq Run (standaard) v1.0', 'Dx Automated NovaSeq Run (standaard) v1.1',
     'AUTOMATED - NovaSeq Run (NovaSeq 6000 v3.1)',
+    'Dx NovaSeqXPlus Run v1.0'
 ]
 
 # BCLConvert conversion settings

From f345fc95b0cb25ddc58af8bac30b8ae4224c49db Mon Sep 17 00:00:00 2001
From: Robert Ernst <r.f.ernst-3@umcutrecht.nl>
Date: Fri, 8 Mar 2024 15:28:52 +0100
Subject: [PATCH 30/30] Remove debug comment

---
 clarity_epp/placement/pool.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clarity_epp/placement/pool.py b/clarity_epp/placement/pool.py
index f733046..3e4b26e 100644
--- a/clarity_epp/placement/pool.py
+++ b/clarity_epp/placement/pool.py
@@ -40,7 +40,6 @@ def unpooling(lims, process_id):
                         sample_projects[data[sample_index]] = data[project_index]
 
         # Parse sequencing run samples and move Dx samples to post sequencing workflow
-        # for lane in
         for lane in process.all_inputs():
             for sample_artifact in get_sample_artifacts_from_pool(lims, lane):
                 sample = sample_artifact.samples[0]   # Asume all samples metadata is identical.