From a78999bf447167dda09c0d074367bf7962a73317 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 8 Jan 2024 13:41:59 -0500
Subject: [PATCH 01/19] add nebnext profile

---
 conf/nebnext.config | 24 ++++++++++++++++++++++++
 nextflow.config     |  1 +
 2 files changed, 25 insertions(+)
 create mode 100644 conf/nebnext.config
diff --git a/conf/nebnext.config b/conf/nebnext.config
new file mode 100644
index 00000000..a3216fda
--- /dev/null
+++ b/conf/nebnext.config
@@ -0,0 +1,24 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile nebnext_human,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'NEBNext - AbSeq profile'
+    config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq experimental protocol'
+
+    mode = 'fastq'
+
+    library_generation_method = 'dt_5p_race_umi'
+    cprimer_position = 'R1'
+    cprimer_start = 0
+    umi_length = 17
+    umi_position = 'R2'
+}
diff --git a/nextflow.config b/nextflow.config
index 5eee97cc..94e3c83d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -268,6 +268,7 @@ profiles {
     test_nocluster { includeConfig 'conf/test_nocluster.config' }
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
     test_igblast { includeConfig 'conf/test_igblast.config' }
+    nebnext { includeConfig 'conf/nebnext.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

From 507a3d31b11bfb33b8ea0fe8c24d8c4d7a92cde7 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 8 Jan 2024 16:37:19 -0500
Subject: [PATCH 02/19] add clontech umi protocol

---
 conf/clontech.config                          | 36 +++++++++++++
 conf/nebnext.config                           |  2 +
 .../local/presto/presto_maskprimers_align.nf  | 50 +++++++++++++++++++
 nextflow.config                               |  4 ++
 subworkflows/local/presto_umi.nf              | 32 ++++++++----
 subworkflows/local/sequence_assembly.nf       |  7 ++-
 6 files changed, 121 insertions(+), 10 deletions(-)
 create mode 100644 conf/clontech.config
 create mode 100644 modules/local/presto/presto_maskprimers_align.nf

diff --git a/conf/clontech.config b/conf/clontech.config
new file mode 100644
index 00000000..64c94162
--- /dev/null
+++ b/conf/clontech.config
@@ -0,0 +1,36 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile nebnext_human,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Takara Bio / Clontech SMARTer v2'
+    config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) protocol profile'
+
+    mode = 'fastq'
+
+    library_generation_method = 'dt_5p_race_umi'
+
+    cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/c98269b194e9c6262fe3b098be3600ba7f64b85c/protocols/Universal/Human_IG_CRegion_RC.fasta'
+
+    // primer options
+    cprimer_position = 'R1'
+    cprimer_start = 0
+    vprimer_start = 0
+    umi_length = 12
+    umi_position = 'R2'
+
+    // Mask primer options
+    maskprimers_align = true
+    primer_extract_len = 7
+    primer_mask_mode = 'cut'
+    primer_maxlen = 70
+    primer_maxerror = 0.2
+}
diff --git a/conf/nebnext.config b/conf/nebnext.config
index a3216fda..1b0462f3 100644
--- a/conf/nebnext.config
+++ b/conf/nebnext.config
@@ -15,6 +15,8 @@ params {
     config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq experimental protocol'
 
     mode = 'fastq'
+    cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R1_Human_IG_Primers.fasta'
+    race_linker = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R2_TS.fasta'
 
     library_generation_method = 'dt_5p_race_umi'
     cprimer_position = 'R1'
diff --git a/modules/local/presto/presto_maskprimers_align.nf b/modules/local/presto/presto_maskprimers_align.nf
new file mode 100644
index 00000000..53d132fd
--- /dev/null
+++ b/modules/local/presto/presto_maskprimers_align.nf
@@ -0,0 +1,50 @@
+process PRESTO_MASKPRIMERS_ALIGN {
+    tag "$meta.id"
+    label "process_high"
+    label 'immcantation'
+
+    conda "bioconda::presto=0.7.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' :
+        'biocontainers/presto:0.7.1--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(R1), path(R2)
+    path(cprimers)
+
+    output:
+    tuple val(meta), path("*_R1_primers-pass.fastq"), path("*_R2_primers-pass.fastq") , emit: reads
+    path "*_command_log.txt", emit: logs
+    path "*_R1.log"
+    path "*_R2.log"
+    path "*.tab", emit: log_tab
+    path "versions.yml" , emit: versions
+
+
+    script:
+    """
+    MaskPrimers.py align --nproc ${task.cpus} \\
+    -s $R1 \\
+    -p ${cprimers} \\
+    --maxlen ${params.primer_maxlen} \\
+    --maxerror ${params.primer_maxerror} \\
+    --mode ${params.primer_mask_mode} \\
+    --skiprc \\
+    --outname ${meta.id}_R1 \\
+    --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
+    MaskPrimers.py extract --nproc ${task.cpus} \\
+    -s $R2 \\
+    --start ${params.umi_length} \\
+    --len ${params.primer_extract_len} \\
+    --barcode \\
+    --mode ${params.primer_mask_mode} \\
+    --outname ${meta.id}_R2 \\
+    --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+    ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID PRIMER ERROR
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        presto: \$( MaskPrimers.py --version | awk -F' '  '{print \$2}' )
+    END_VERSIONS
+    """
+}
diff --git a/nextflow.config b/nextflow.config
index 94e3c83d..311257e9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -48,6 +48,9 @@ params {
     // sequence assembly options
     // --------------------------
     filterseq_q = 20
+    maskprimers_align = false
+    primer_extract_len = 0
+    primer_maxlen = 50
     primer_maxerror = 0.2
     primer_mask_mode = 'cut'
     primer_consensus = 0.6
@@ -269,6 +272,7 @@ profiles {
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
     test_igblast { includeConfig 'conf/test_igblast.config' }
     nebnext { includeConfig 'conf/nebnext.config' }
+    clontech { includeConfig 'conf/clontech.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf
index 8d7d8713..3b53301b 100644
--- a/subworkflows/local/presto_umi.nf
+++ b/subworkflows/local/presto_umi.nf
@@ -9,6 +9,7 @@ include { FASTP                                          } from '../../modules/n
 //PRESTO
 include { PRESTO_FILTERSEQ      as  PRESTO_FILTERSEQ_UMI }      from '../../modules/local/presto/presto_filterseq'
 include { PRESTO_MASKPRIMERS    as  PRESTO_MASKPRIMERS_UMI }    from '../../modules/local/presto/presto_maskprimers'
+include { PRESTO_MASKPRIMERS_ALIGN }                            from '../../modules/local/presto/presto_maskprimers_align'
 include { PRESTO_PAIRSEQ        as  PRESTO_PAIRSEQ_UMI }        from '../../modules/local/presto/presto_pairseq'
 include { PRESTO_CLUSTERSETS    as  PRESTO_CLUSTERSETS_UMI }    from '../../modules/local/presto/presto_clustersets'
 include { PRESTO_PARSE_CLUSTER  as  PRESTO_PARSE_CLUSTER_UMI }  from '../../modules/local/presto/presto_parse_cluster'
@@ -90,17 +91,30 @@ workflow PRESTO_UMI {
     PRESTO_FILTERSEQ_UMI ( GUNZIP_UMI.out.reads )
     ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_UMI.out.versions)
 
-    // Mask primers
-    PRESTO_MASKPRIMERS_UMI (
-        PRESTO_FILTERSEQ_UMI.out.reads,
-        ch_cprimers.collect(),
-        ch_vprimers.collect()
-    )
-    ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions)
+    if (params.maskprimers_align) {
+        PRESTO_MASKPRIMERS_ALIGN(
+            PRESTO_FILTERSEQ_UMI.out.reads,
+            ch_cprimers.collect()
+        )
+        ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_ALIGN.out.versions)
+        ch_maskprimers_reads = PRESTO_MASKPRIMERS_ALIGN.out.reads
+        ch_maskprimers_logs = PRESTO_MASKPRIMERS_ALIGN.out.logs
+    } else {
+        // Mask primers
+        PRESTO_MASKPRIMERS_UMI (
+            PRESTO_FILTERSEQ_UMI.out.reads,
+            ch_cprimers.collect(),
+            ch_vprimers.collect()
+        )
+        ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions)
+        ch_maskprimers_reads = PRESTO_MASKPRIMERS_UMI.out.reads
+        ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs
+    }
+
 
     // Pre-consensus pair
     PRESTO_PAIRSEQ_UMI (
-        PRESTO_MASKPRIMERS_UMI.out.reads
+        ch_maskprimers_reads
     )
     ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions)
 
@@ -186,7 +200,7 @@ workflow PRESTO_UMI {
     fastp_reads_html = FASTP.out.html.collect{ meta,html -> html }
     fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_UMI.out.zip
     presto_filterseq_logs = PRESTO_FILTERSEQ_UMI.out.logs
-    presto_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs.collect()
+    presto_maskprimers_logs = ch_maskprimers_logs.collect()
     presto_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs.collect()
     presto_clustersets_logs = ch_clustersets_logs
     presto_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs.collect()
diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf
index 26bfd3cd..857d6160 100644
--- a/subworkflows/local/sequence_assembly.nf
+++ b/subworkflows/local/sequence_assembly.nf
@@ -108,6 +108,8 @@ workflow SEQUENCE_ASSEMBLY {
             error "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option."
         } else if (params.race_linker) {
             ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true)
+        } else if (params.maskprimers_align) {
+            ch_vprimers_fasta = Channel.of([])
         } else {
             error "The oligo-dT 5'-RACE UMI library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'."
         }
@@ -124,6 +126,8 @@ workflow SEQUENCE_ASSEMBLY {
             error "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option."
         } else if (params.race_linker) {
             ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true)
+        } else if (params.maskprimers_align) {
+            ch_vprimers_fasta = Channel.of([])
         } else {
             error "The oligo-dT 5'-RACE library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'."
         }
@@ -145,7 +149,8 @@ workflow SEQUENCE_ASSEMBLY {
     if (params.index_file & params.umi_position == 'R2') {error "Please do not set `--umi_position` option if index file with UMIs is provided."}
     if (params.umi_length < 0) {error "Please provide the UMI barcode length in the option `--umi_length`. To run without UMIs, set umi_length to 0."}
     if (!params.index_file & params.umi_start != 0) {error "Setting a UMI start position is only allowed when providing the UMIs in a separate index read file. If so, please provide the `--index_file` flag as well."}
-
+    if (params.maskprimers_align & params.umi_position == 'R1') {error "The maskprimers align option is only supported with UMI barcodes in the R2 reads (reads containing V region)."}
+    if (params.maskprimers_align & params.cprimer_position == 'R2') {error "The maskprimers align option is only supported with Cprimers in the R1 reads (reads containing C region)."}
 
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files

From e4d1f5d5547cb83aa2381592a45c2abaf312f912 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 9 Jan 2024 13:07:25 -0500
Subject: [PATCH 03/19] add nebnext and clontech protocols

---
 conf/clontech.config                          |  3 +-
 conf/modules.config                           | 20 ++++++-
 conf/nebnext.config                           |  5 ++
 .../presto/presto_assemblepairs_sequential.nf | 39 ++++++++++++++
 modules/local/presto/presto_maskprimers.nf    |  8 +--
 .../local/presto/presto_maskprimers_align.nf  |  2 +-
 .../presto/presto_maskprimers_postassembly.nf |  8 +--
 nextflow.config                               |  4 +-
 subworkflows/local/databases.nf               | 54 +++++++++++++++++++
 subworkflows/local/presto_umi.nf              | 33 +++++++++---
 subworkflows/local/sequence_assembly.nf       |  6 ++-
 subworkflows/local/vdj_annotation.nf          | 46 +---------------
 workflows/airrflow.nf                         | 13 ++++-
 13 files changed, 173 insertions(+), 68 deletions(-)
 create mode 100644 modules/local/presto/presto_assemblepairs_sequential.nf
 create mode 100644 subworkflows/local/databases.nf

diff --git a/conf/clontech.config b/conf/clontech.config
index 64c94162..d4bbdf79 100644
--- a/conf/clontech.config
+++ b/conf/clontech.config
@@ -32,5 +32,6 @@ params {
     primer_extract_len = 7
     primer_mask_mode = 'cut'
     primer_maxlen = 70
-    primer_maxerror = 0.2
+    primer_r1_maxerror = 0.2
+    assemblepairs_sequential = true
 }
diff --git a/conf/modules.config b/conf/modules.config
index 7da8b130..c86b3c37 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -138,7 +138,15 @@ process {
         ]
     }
 
-    withName: PRESTO_MASKPRIMERS {
+    withName: PRESTO_MASKPRIMERS_UMI {
+        publishDir = [
+            path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: PRESTO_MASKPRIMERS_ALIGN {
         publishDir = [
             path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" },
             mode: params.publish_dir_mode,
@@ -207,6 +215,16 @@ process {
         ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE'
     }
 
+    withName: PRESTO_ASSEMBLEPAIRS_SEQUENTIAL {
+        publishDir = [
+            path: { "${params.outdir}/presto/08-assemble-pairs/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--coord presto --rc tail --1f CONSCOUNT --2f CONSCOUNT PRCONS --minlen 8 --maxerror 0.3 --alpha 1e-5 --scanrev --minident 0.5 --evalue 1e-5 --maxhits 100 --aligner blastn'
+        ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE'
+    }
+
     withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI {
         publishDir = [
             path: { "${params.outdir}/presto/01-assemble-pairs/${meta.id}" },
diff --git a/conf/nebnext.config b/conf/nebnext.config
index 1b0462f3..7b19a1ed 100644
--- a/conf/nebnext.config
+++ b/conf/nebnext.config
@@ -23,4 +23,9 @@ params {
     cprimer_start = 0
     umi_length = 17
     umi_position = 'R2'
+
+    //presto options
+    primer_r1_maxerror = 0.2
+    primer_r2_maxerror = 0.5
+    assemblepairs_sequential = true
 }
diff --git a/modules/local/presto/presto_assemblepairs_sequential.nf b/modules/local/presto/presto_assemblepairs_sequential.nf
new file mode 100644
index 00000000..b75d7607
--- /dev/null
+++ b/modules/local/presto/presto_assemblepairs_sequential.nf
@@ -0,0 +1,39 @@
+process PRESTO_ASSEMBLEPAIRS_SEQUENTIAL {
+    tag "$meta.id"
+    label 'process_long_parallelized'
+    label 'immcantation'
+
+    //TODO add igblast to container
+    conda "bioconda::presto=0.7.1 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1"
+    //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //    'https://depot.galaxyproject.org/singularity/quay.io/biocontainers/mulled-v2-23ca863c1007648366380a118c1cb3c060379dc5:190133c91e389ba1df2063a4eb7dc43d9c926c50-0' :
+    //    'biocontainers/mulled-v2-23ca863c1007648366380a118c1cb3c060379dc5:190133c91e389ba1df2063a4eb7dc43d9c926c50-0' }"
+    container "docker.io/immcantation/suite:4.4.0"
+
+    input:
+    tuple val(meta), path(R1), path(R2) // reads in fastq format
+    path(igblast) // igblast references
+
+    output:
+    tuple val(meta), path("*_assemble-pass.fastq"), emit: reads
+    path("*_command_log.txt"), emit: logs
+    path("*.log")
+    path("*_table.tab")
+    path "versions.yml" , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    """
+    AssemblePairs.py sequential -1 $R2 -2 $R1 --nproc ${task.cpus} \\
+        -r "${igblast}/fasta/imgt_${meta.species}_${meta.locus.toLowerCase()}_v.fasta" \\
+        $args \\
+        --outname ${meta.id} --log ${meta.id}.log > ${meta.id}_command_log.txt
+    ParseLog.py -l ${meta.id}.log $args2
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        presto: \$( AssemblePairs.py --version | awk -F' '  '{print \$2}' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/presto/presto_maskprimers.nf b/modules/local/presto/presto_maskprimers.nf
index 99aab4dd..6845f44c 100644
--- a/modules/local/presto/presto_maskprimers.nf
+++ b/modules/local/presto/presto_maskprimers.nf
@@ -28,8 +28,8 @@ process PRESTO_MASKPRIMERS {
         def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}"
         def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}"
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
         ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID PRIMER ERROR
 
         cat <<-END_VERSIONS > versions.yml
@@ -41,8 +41,8 @@ process PRESTO_MASKPRIMERS {
         def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}"
         def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}"
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
         ParseLog.py -l "${meta.id}_R1.log" "${meta.id}_R2.log" -f ID PRIMER ERROR
 
         cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/presto/presto_maskprimers_align.nf b/modules/local/presto/presto_maskprimers_align.nf
index 53d132fd..58e0bcd3 100644
--- a/modules/local/presto/presto_maskprimers_align.nf
+++ b/modules/local/presto/presto_maskprimers_align.nf
@@ -27,7 +27,7 @@ process PRESTO_MASKPRIMERS_ALIGN {
     -s $R1 \\
     -p ${cprimers} \\
     --maxlen ${params.primer_maxlen} \\
-    --maxerror ${params.primer_maxerror} \\
+    --maxerror ${params.primer_r1_maxerror} \\
     --mode ${params.primer_mask_mode} \\
     --skiprc \\
     --outname ${meta.id}_R1 \\
diff --git a/modules/local/presto/presto_maskprimers_postassembly.nf b/modules/local/presto/presto_maskprimers_postassembly.nf
index 982b6f46..8c6c742b 100644
--- a/modules/local/presto/presto_maskprimers_postassembly.nf
+++ b/modules/local/presto/presto_maskprimers_postassembly.nf
@@ -24,10 +24,10 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY {
     def revpr = params.primer_revpr ? '--revpr' : ''
     if (params.cprimer_position == "R1") {
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r1_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-FWD \
             --log ${meta.id}-FWD.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r2_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-REV $revpr \
             --log ${meta.id}-REV.log >> ${meta.id}_command_log.txt
         ParseLog.py -l ${meta.id}-FWD.log ${meta.id}-REV.log -f ID PRIMER ERROR
@@ -39,10 +39,10 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY {
         """
     } else if (params.cprimer_position == "R2") {
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.cprimer_start} --maxerror ${params.primer_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r1_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-FWD \
             --log ${meta.id}-FWD.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.vprimer_start} --maxerror ${params.primer_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r2_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-REV $revpr \
             --log ${meta.id}-REV.log >> ${meta.id}_command_log.txt
         ParseLog.py -l ${meta.id}-FWD.log ${meta.id}-REV.log -f ID PRIMER ERROR
diff --git a/nextflow.config b/nextflow.config
index 311257e9..c7aa9e2a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,12 +51,14 @@ params {
     maskprimers_align = false
     primer_extract_len = 0
     primer_maxlen = 50
-    primer_maxerror = 0.2
+    primer_r1_maxerror = 0.2
+    primer_r2_maxerror = 0.2
     primer_mask_mode = 'cut'
     primer_consensus = 0.6
     buildconsensus_maxerror = 0.1
     buildconsensus_maxgap = 0.5
     cluster_sets = true
+    assemblepairs_sequential = false
 
     // -----------------------
     // vdj annotation options
diff --git a/subworkflows/local/databases.nf b/subworkflows/local/databases.nf
new file mode 100644
index 00000000..594b340e
--- /dev/null
+++ b/subworkflows/local/databases.nf
@@ -0,0 +1,54 @@
+include { FETCH_DATABASES } from '../../modules/local/fetch_databases'
+include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db'
+include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db'
+
+workflow DATABASES {
+
+    take:
+
+    main:
+    ch_versions = Channel.empty()
+
+    // FETCH DATABASES
+    if( !params.fetch_imgt ){
+        if (params.igblast_base.endsWith(".zip")) {
+            Channel.fromPath("${params.igblast_base}")
+                    .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" }
+                    .set { ch_igblast_zipped }
+            UNZIP_IGBLAST( ch_igblast_zipped.collect() )
+            ch_igblast = UNZIP_IGBLAST.out.unzipped
+            ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions)
+        } else {
+            Channel.fromPath("${params.igblast_base}")
+                .ifEmpty { error "IGBLAST DB not found: ${params.igblast_base}" }
+                .set { ch_igblast }
+        }
+    }
+
+    if( !params.fetch_imgt ){
+        if (params.imgtdb_base.endsWith(".zip")) {
+            Channel.fromPath("${params.imgtdb_base}")
+                    .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" }
+                    .set { ch_imgt_zipped }
+            UNZIP_IMGT( ch_imgt_zipped.collect() )
+            ch_imgt = UNZIP_IMGT.out.unzipped
+            ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions)
+        } else {
+            Channel.fromPath("${params.imgtdb_base}")
+                .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" }
+                .set { ch_imgt }
+        }
+    }
+
+    if (params.fetch_imgt) {
+        FETCH_DATABASES()
+        ch_igblast = FETCH_DATABASES.out.igblast
+        ch_imgt = FETCH_DATABASES.out.imgt
+        ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions)
+    }
+
+    emit:
+    versions = ch_versions
+    imgt = ch_imgt
+    igblast = ch_igblast
+}
diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf
index 3b53301b..c3dc99c8 100644
--- a/subworkflows/local/presto_umi.nf
+++ b/subworkflows/local/presto_umi.nf
@@ -16,6 +16,7 @@ include { PRESTO_PARSE_CLUSTER  as  PRESTO_PARSE_CLUSTER_UMI }  from '../../modu
 include { PRESTO_BUILDCONSENSUS as  PRESTO_BUILDCONSENSUS_UMI } from '../../modules/local/presto/presto_buildconsensus'
 include { PRESTO_POSTCONSENSUS_PAIRSEQ as PRESTO_POSTCONSENSUS_PAIRSEQ_UMI }    from '../../modules/local/presto/presto_postconsensus_pairseq'
 include { PRESTO_ASSEMBLEPAIRS  as  PRESTO_ASSEMBLEPAIRS_UMI }  from '../../modules/local/presto/presto_assemblepairs'
+include { PRESTO_ASSEMBLEPAIRS_SEQUENTIAL }                     from '../../modules/local/presto/presto_assemblepairs_sequential'
 include { PRESTO_PARSEHEADERS   as  PRESTO_PARSEHEADERS_COLLAPSE_UMI } from '../../modules/local/presto/presto_parseheaders'
 include { PRESTO_PARSEHEADERS_PRIMERS   as PRESTO_PARSEHEADERS_PRIMERS_UMI }    from '../../modules/local/presto/presto_parseheaders_primers'
 include { PRESTO_PARSEHEADERS_METADATA  as PRESTO_PARSEHEADERS_METADATA_UMI }   from '../../modules/local/presto/presto_parseheaders_metadata'
@@ -29,6 +30,7 @@ workflow PRESTO_UMI {
     ch_cprimers    // channel: [ cprimers.fasta ]
     ch_vprimers    // channel: [ vprimers.fasta ]
     ch_adapter_fasta // channel: [ adapters.fasta ]
+    ch_igblast
 
     main:
 
@@ -151,21 +153,36 @@ workflow PRESTO_UMI {
     )
     ch_versions = ch_versions.mix(PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.versions)
 
-    // Assemble read pairs
-    PRESTO_ASSEMBLEPAIRS_UMI (
-        PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads
-    )
-    ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_UMI.out.versions)
+    if (params.assemblepairs_sequential){
+        // Assemble read pairs sequential
+        PRESTO_ASSEMBLEPAIRS_SEQUENTIAL (
+            PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads,
+            ch_igblast.collect()
+        )
+        ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.versions)
+        ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.reads
+        ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.logs
+    } else {
+        // Assemble read pairs align
+        PRESTO_ASSEMBLEPAIRS_UMI (
+            PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads
+        )
+        ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_UMI.out.versions)
+        ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads
+        ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs
+    }
+
+
 
     // Generate QC stats after reads paired and filtered but before collapsed
     FASTQC_POSTASSEMBLY_UMI (
-        PRESTO_ASSEMBLEPAIRS_UMI.out.reads
+        ch_assemblepairs_reads
     )
     ch_versions = ch_versions.mix(FASTQC_POSTASSEMBLY_UMI.out.versions)
 
     // Combine UMI duplicate count
     PRESTO_PARSEHEADERS_COLLAPSE_UMI (
-        PRESTO_ASSEMBLEPAIRS_UMI.out.reads
+        ch_assemblepairs_reads
     )
     ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.versions)
 
@@ -205,7 +222,7 @@ workflow PRESTO_UMI {
     presto_clustersets_logs = ch_clustersets_logs
     presto_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs.collect()
     presto_postconsensus_pairseq_logs = PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.logs.collect()
-    presto_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs.collect()
+    presto_assemblepairs_logs = ch_assemblepairs_logs.collect()
     presto_collapseseq_logs = PRESTO_COLLAPSESEQ_UMI.out.logs.collect()
     presto_splitseq_logs = PRESTO_SPLITSEQ_UMI.out.logs.collect()
 }
diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf
index 857d6160..d3991d33 100644
--- a/subworkflows/local/sequence_assembly.nf
+++ b/subworkflows/local/sequence_assembly.nf
@@ -50,7 +50,8 @@ include { FASTQC                      } from '../../modules/nf-core/fastqc/main'
 workflow SEQUENCE_ASSEMBLY {
 
     take:
-    ch_input // channel:
+    ch_input // channel: reads
+    ch_igblast
 
     main:
 
@@ -195,7 +196,8 @@ workflow SEQUENCE_ASSEMBLY {
             ch_reads,
             ch_cprimers_fasta,
             ch_vprimers_fasta,
-            ch_adapter_fasta
+            ch_adapter_fasta,
+            ch_igblast.collect()
         )
         ch_presto_fasta = PRESTO_UMI.out.fasta
         ch_presto_software = PRESTO_UMI.out.software
diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf
index c80d3503..4ac2b9df 100644
--- a/subworkflows/local/vdj_annotation.nf
+++ b/subworkflows/local/vdj_annotation.nf
@@ -1,6 +1,3 @@
-include { FETCH_DATABASES } from '../../modules/local/fetch_databases'
-include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db'
-include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db'
 include { CHANGEO_ASSIGNGENES } from '../../modules/local/changeo/changeo_assigngenes'
 include { CHANGEO_MAKEDB } from '../../modules/local/changeo/changeo_makedb'
 include { CHANGEO_PARSEDB_SPLIT } from '../../modules/local/changeo/changeo_parsedb_split'
@@ -15,52 +12,13 @@ workflow VDJ_ANNOTATION {
     take:
     ch_fasta // [meta, fasta]
     ch_validated_samplesheet
+    ch_igblast
+    ch_imgt
 
     main:
     ch_versions = Channel.empty()
     ch_logs = Channel.empty()
 
-    // FETCH DATABASES
-    // TODO: this can take a long time, and the progress shows 0%. Would be
-    // nice to have some better progress reporting.
-    // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST?
-    if( !params.fetch_imgt ){
-        if (params.igblast_base.endsWith(".zip")) {
-            Channel.fromPath("${params.igblast_base}")
-                    .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" }
-                    .set { ch_igblast_zipped }
-            UNZIP_IGBLAST( ch_igblast_zipped.collect() )
-            ch_igblast = UNZIP_IGBLAST.out.unzipped
-            ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions)
-        } else {
-            Channel.fromPath("${params.igblast_base}")
-                .ifEmpty { error "IGBLAST DB not found: ${params.igblast_base}" }
-                .set { ch_igblast }
-        }
-    }
-
-    if( !params.fetch_imgt ){
-        if (params.imgtdb_base.endsWith(".zip")) {
-            Channel.fromPath("${params.imgtdb_base}")
-                    .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" }
-                    .set { ch_imgt_zipped }
-            UNZIP_IMGT( ch_imgt_zipped.collect() )
-            ch_imgt = UNZIP_IMGT.out.unzipped
-            ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions)
-        } else {
-            Channel.fromPath("${params.imgtdb_base}")
-                .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" }
-                .set { ch_imgt }
-        }
-    }
-
-    if (params.fetch_imgt) {
-        FETCH_DATABASES()
-        ch_igblast = FETCH_DATABASES.out.igblast
-        ch_imgt = FETCH_DATABASES.out.imgt
-        ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions)
-    }
-
     CHANGEO_ASSIGNGENES (
         ch_fasta,
         ch_igblast.collect()
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index 746b175d..03023835 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -54,6 +54,7 @@ include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '.
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
+include { DATABASES                     } from '../subworkflows/local/databases'
 include { SEQUENCE_ASSEMBLY             } from '../subworkflows/local/sequence_assembly'
 include { ASSEMBLED_INPUT_CHECK         } from '../subworkflows/local/assembled_input_check'
 include { VDJ_ANNOTATION                } from '../subworkflows/local/vdj_annotation'
@@ -88,10 +89,16 @@ workflow AIRRFLOW {
     ch_versions = Channel.empty()
     ch_reassign_logs = Channel.empty()
 
+    // Download or fetch databases
+    DATABASES()
+
     if ( params.mode == "fastq" ) {
 
         // Perform sequence assembly if input type is fastq
-        SEQUENCE_ASSEMBLY( ch_input )
+        SEQUENCE_ASSEMBLY(
+            ch_input,
+            DATABASES.out.igblast.collect()
+        )
 
         ch_fasta                    = SEQUENCE_ASSEMBLY.out.fasta
         ch_versions                 = ch_versions.mix(SEQUENCE_ASSEMBLY.out.versions)
@@ -153,7 +160,9 @@ workflow AIRRFLOW {
     // Perform V(D)J annotation and filtering
     VDJ_ANNOTATION(
         ch_fasta,
-        ch_validated_samplesheet.collect()
+        ch_validated_samplesheet.collect(),
+        DATABASES.out.igblast.collect(),
+        DATABASES.out.imgt.collect()
     )
     ch_versions = ch_versions.mix( VDJ_ANNOTATION.out.versions )
 

From 9e3a7ea208a8bc3c855ab27dd4ad6e1f63625d4f Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 10 Jan 2024 14:21:12 -0500
Subject: [PATCH 04/19] update assemblepairs sequential container

---
 .../local/presto/presto_assemblepairs_sequential.nf    | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/modules/local/presto/presto_assemblepairs_sequential.nf b/modules/local/presto/presto_assemblepairs_sequential.nf
index b75d7607..40e0e1b6 100644
--- a/modules/local/presto/presto_assemblepairs_sequential.nf
+++ b/modules/local/presto/presto_assemblepairs_sequential.nf
@@ -3,12 +3,10 @@ process PRESTO_ASSEMBLEPAIRS_SEQUENTIAL {
     label 'process_long_parallelized'
     label 'immcantation'
 
-    //TODO add igblast to container
-    conda "bioconda::presto=0.7.1 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1"
-    //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    //    'https://depot.galaxyproject.org/singularity/quay.io/biocontainers/mulled-v2-23ca863c1007648366380a118c1cb3c060379dc5:190133c91e389ba1df2063a4eb7dc43d9c926c50-0' :
-    //    'biocontainers/mulled-v2-23ca863c1007648366380a118c1cb3c060379dc5:190133c91e389ba1df2063a4eb7dc43d9c926c50-0' }"
-    container "docker.io/immcantation/suite:4.4.0"
+    conda "bioconda::presto=0.7.1 bioconda::igblast=1.21.0 conda-forge::wget=1.20.1 conda-forge::biopython=1.79"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-865ad74e0cfd6de39e9e3ade759d826fce726425:25073cb5e81f4a0dcd2f99ddd308510b3461df7e-0' :
+        'biocontainers/mulled-v2-865ad74e0cfd6de39e9e3ade759d826fce726425:25073cb5e81f4a0dcd2f99ddd308510b3461df7e-0' }"
 
     input:
     tuple val(meta), path(R1), path(R2) // reads in fastq format

From 043dcb0a5a06fdc7097ebaf32fb7b8e9035f9fff Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 16 Jan 2024 13:38:09 -0500
Subject: [PATCH 05/19] work on protocols

---
 conf/{clontech.config => clontech_umi.config} | 0
 conf/{nebnext.config => nebnext_umi.config}   | 0
 nextflow.config                               | 4 ++--
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename conf/{clontech.config => clontech_umi.config} (100%)
 rename conf/{nebnext.config => nebnext_umi.config} (100%)

diff --git a/conf/clontech.config b/conf/clontech_umi.config
similarity index 100%
rename from conf/clontech.config
rename to conf/clontech_umi.config
diff --git a/conf/nebnext.config b/conf/nebnext_umi.config
similarity index 100%
rename from conf/nebnext.config
rename to conf/nebnext_umi.config
diff --git a/nextflow.config b/nextflow.config
index c7aa9e2a..b970e397 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -273,8 +273,8 @@ profiles {
     test_nocluster { includeConfig 'conf/test_nocluster.config' }
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
     test_igblast { includeConfig 'conf/test_igblast.config' }
-    nebnext { includeConfig 'conf/nebnext.config' }
-    clontech { includeConfig 'conf/clontech.config' }
+    nebnext_umi { includeConfig 'conf/nebnext_umi.config' }
+    clontech_umi { includeConfig 'conf/clontech_umi.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

From b113679a6d9e4ba2581044ea4527b62926c148c6 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 19 Jan 2024 18:04:31 -0500
Subject: [PATCH 06/19] maskprimers separate R1 and R2

---
 bin/log_parsing.py                            | 119 ++++++++----------
 conf/nebnext_umi.config                       |   1 +
 modules/local/presto/presto_filterseq.nf      |   6 +-
 modules/local/presto/presto_maskprimers.nf    |  10 +-
 .../local/presto/presto_maskprimers_align.nf  |  20 +--
 .../presto/presto_maskprimers_extract.nf      |  38 ++++++
 .../presto/presto_maskprimers_postassembly.nf |   4 +-
 subworkflows/local/presto_umi.nf              |  23 +++-
 8 files changed, 130 insertions(+), 91 deletions(-)
 create mode 100644 modules/local/presto/presto_maskprimers_extract.nf

diff --git a/bin/log_parsing.py b/bin/log_parsing.py
index dce9c831..4534a0d5 100755
--- a/bin/log_parsing.py
+++ b/bin/log_parsing.py
@@ -52,7 +52,7 @@
 df_process_list = []
 
 for process in processes:
-    find = subprocess.check_output(["find", process, "-name", "*command_log.txt"])
+    find = subprocess.check_output(["find", process, "-name", "*command_log*"])
     log_files = find.decode().split("\n")
     log_files = list(filter(None, log_files))
 
@@ -90,50 +90,37 @@
 
     elif process in ["mask_primers", "filter_by_sequence_quality"]:
         s_code = []
+        s_readtype = []
         output_file = []
-        seqs_R1 = []
-        seqs_R2 = []
-        pass_R1 = []
-        pass_R2 = []
-        fail_R1 = []
-        fail_R2 = []
+        n_seqs = []
+        n_pass = []
+        n_fail = []
         process_name = []
 
         for logfile in log_files:
-            c = 0
+            if "_R1" in logfile:
+                s_readtype.append("R1")
+            elif "_R2" in logfile:
+                s_readtype.append("R2")
             with open(logfile, "r") as f:
                 for line in f:
                     if " START>" in line:
-                        if c < 1:
                             s_code.append(logfile.split("/")[1].split("_command_log")[0])
-
                             process_name.append(process)
                     elif "SEQUENCES>" in line:
-                        if c < 1:
-                            seqs_R1.append(line.strip().removeprefix("SEQUENCES> "))
-                        else:
-                            seqs_R2.append(line.strip().removeprefix("SEQUENCES> "))
+                            n_seqs.append(line.strip().removeprefix("SEQUENCES> "))
                     elif "PASS>" in line:
-                        if c < 1:
-                            pass_R1.append(line.strip().removeprefix("PASS> "))
-                        else:
-                            pass_R2.append(line.strip().removeprefix("PASS> "))
+                            n_pass.append(line.strip().removeprefix("PASS> "))
                     elif "FAIL>" in line:
-                        if c < 1:
-                            fail_R1.append(line.strip().removeprefix("FAIL> "))
-                            c += 1
-                        else:
-                            fail_R2.append(line.strip().removeprefix("FAIL> "))
+                            n_fail.append(line.strip().removeprefix("FAIL> "))
 
         df_process = pd.DataFrame.from_dict(
             {
                 "Sample": s_code,
-                "start_R1": seqs_R1,
-                "start_R2": seqs_R2,
-                "pass_R1": pass_R1,
-                "pass_R2": pass_R2,
-                "fail_R1": fail_R1,
-                "fail_R2": fail_R2,
+                "readtype": s_readtype,
+                "start": n_seqs,
+                "pass": n_pass,
+                "fail": n_fail,
                 "process": process_name,
             }
         )
@@ -344,40 +331,7 @@
 
         df_process_list.append(df_process)
 
-# Getting table colnames
 
-colnames = [
-    "Sample",
-    "Sequences_R1",
-    "Sequences_R2",
-    "Filtered_quality_R1",
-    "Filtered_quality_R2",
-    "Mask_primers_R1",
-    "Mask_primers_R2",
-    "Paired",
-    "Build_consensus",
-    "Assemble_pairs",
-    "Unique",
-    "Representative_2",
-    "Igblast",
-]
-
-
-values = [
-    df_process_list[0].sort_values(by=["Sample"]).iloc[:, 0].tolist(),
-    df_process_list[0].sort_values(by=["Sample"]).loc[:, "start_R1"].tolist(),
-    df_process_list[0].sort_values(by=["Sample"]).loc[:, "start_R2"].tolist(),
-    df_process_list[0].sort_values(by=["Sample"]).loc[:, "pass_R1"].tolist(),
-    df_process_list[0].sort_values(by=["Sample"]).loc[:, "pass_R2"].tolist(),
-    df_process_list[1].sort_values(by=["Sample"]).loc[:, "pass_R1"].tolist(),
-    df_process_list[1].sort_values(by=["Sample"]).loc[:, "pass_R2"].tolist(),
-    df_process_list[2].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
-    df_process_list[4].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
-    df_process_list[5].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
-    df_process_list[6].sort_values(by=["Sample"]).loc[:, "unique"].tolist(),
-    df_process_list[7].sort_values(by=["Sample"]).loc[:, "repres_2"].tolist(),
-    df_process_list[7].sort_values(by=["Sample"]).loc[:, "pass_igblast"].tolist(),
-]
 
 
 # Tables provide extra info and help debugging
@@ -385,7 +339,7 @@
     path_or_buf="Table_all_details_filter_quality.tsv",
     sep="\t",
     header=True,
-    index=False,
+    index=True,
 )
 df_process_list[1].to_csv(path_or_buf="Table_all_details_mask_primers.tsv", sep="\t", header=True, index=False)
 df_process_list[2].to_csv(path_or_buf="Table_all_details_paired.tsv", sep="\t", header=True, index=False)
@@ -393,7 +347,7 @@
     path_or_buf="Table_all_details_build_consensus.tsv",
     sep="\t",
     header=True,
-    index=False,
+    index=True,
 )
 df_process_list[4].to_csv(path_or_buf="Table_all_details_repaired.tsv", sep="\t", header=True, index=False)
 df_process_list[5].to_csv(
@@ -413,6 +367,43 @@
         index=False,
     )
 
+# Getting table colnames
+
+colnames = [
+    "Sample",
+    "Sequences_R1",
+    "Sequences_R2",
+    "Filtered_quality_R1",
+    "Filtered_quality_R2",
+    "Mask_primers_R1",
+    "Mask_primers_R2",
+    "Paired",
+    "Build_consensus",
+    "Assemble_pairs",
+    "Unique",
+    "Representative_2",
+    "Igblast",
+]
+
+print(df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype"))
+
+values = [
+    df_process_list[2].sort_values(by=["Sample"]).iloc[:, 0].tolist(),
+    df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["start"]["R1"].tolist(),
+    df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["start"]["R2"].tolist(),
+    df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R1"].tolist(),
+    df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R2"].tolist(),
+    df_process_list[1].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R1"].tolist(),
+    df_process_list[1].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R2"].tolist(),
+    df_process_list[2].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
+    df_process_list[4].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
+    df_process_list[5].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(),
+    df_process_list[6].sort_values(by=["Sample"]).loc[:, "unique"].tolist(),
+    df_process_list[7].sort_values(by=["Sample"]).loc[:, "repres_2"].tolist(),
+    df_process_list[7].sort_values(by=["Sample"]).loc[:, "pass_igblast"].tolist(),
+]
+
+
 final_table = dict(zip(colnames, values))
 print(final_table)
 df_final_table = pd.DataFrame.from_dict(final_table)
diff --git a/conf/nebnext_umi.config b/conf/nebnext_umi.config
index 7b19a1ed..95e480a3 100644
--- a/conf/nebnext_umi.config
+++ b/conf/nebnext_umi.config
@@ -28,4 +28,5 @@ params {
     primer_r1_maxerror = 0.2
     primer_r2_maxerror = 0.5
     assemblepairs_sequential = true
+    maskprimers_align = false
 }
diff --git a/modules/local/presto/presto_filterseq.nf b/modules/local/presto/presto_filterseq.nf
index 4af4267b..a7733147 100644
--- a/modules/local/presto/presto_filterseq.nf
+++ b/modules/local/presto/presto_filterseq.nf
@@ -13,7 +13,7 @@ process PRESTO_FILTERSEQ {
 
     output:
     tuple val(meta), path("*R1_quality-pass.fastq"), path("*R2_quality-pass.fastq") ,  emit: reads
-    path "*_command_log.txt" , emit: logs
+    path "*_command_log_R?.txt" , emit: logs
     path "versions.yml" , emit: versions
     path "*_R1.log"
     path "*_R2.log"
@@ -21,8 +21,8 @@ process PRESTO_FILTERSEQ {
 
     script:
     """
-    FilterSeq.py quality -s $R1 -q ${params.filterseq_q} --outname ${meta.id}_R1 --log ${R1.baseName}_R1.log --nproc ${task.cpus} > ${meta.id}_command_log.txt
-    FilterSeq.py quality -s $R2 -q ${params.filterseq_q} --outname ${meta.id}_R2 --log ${R2.baseName}_R2.log --nproc ${task.cpus} >> ${meta.id}_command_log.txt
+    FilterSeq.py quality -s $R1 -q ${params.filterseq_q} --outname ${meta.id}_R1 --log ${R1.baseName}_R1.log --nproc ${task.cpus} > ${meta.id}_command_log_R1.txt
+    FilterSeq.py quality -s $R2 -q ${params.filterseq_q} --outname ${meta.id}_R2 --log ${R2.baseName}_R2.log --nproc ${task.cpus} >> ${meta.id}_command_log_R2.txt
     ParseLog.py -l ${R1.baseName}_R1.log ${R2.baseName}_R2.log -f ID QUALITY
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/presto/presto_maskprimers.nf b/modules/local/presto/presto_maskprimers.nf
index 6845f44c..48e66a84 100644
--- a/modules/local/presto/presto_maskprimers.nf
+++ b/modules/local/presto/presto_maskprimers.nf
@@ -15,7 +15,7 @@ process PRESTO_MASKPRIMERS {
 
     output:
     tuple val(meta), path("*_R1_primers-pass.fastq"), path("*_R2_primers-pass.fastq") , emit: reads
-    path "*_command_log.txt", emit: logs
+    path "*_command_log_R?.txt", emit: logs
     path "*_R1.log"
     path "*_R2.log"
     path "*.tab", emit: log_tab
@@ -28,8 +28,8 @@ process PRESTO_MASKPRIMERS {
         def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}"
         def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}"
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log > ${meta.id}_command_log_R2.txt
         ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID PRIMER ERROR
 
         cat <<-END_VERSIONS > versions.yml
@@ -41,8 +41,8 @@ process PRESTO_MASKPRIMERS {
         def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}"
         def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}"
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt
+        MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log > ${meta.id}_command_log_R2.txt
         ParseLog.py -l "${meta.id}_R1.log" "${meta.id}_R2.log" -f ID PRIMER ERROR
 
         cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/presto/presto_maskprimers_align.nf b/modules/local/presto/presto_maskprimers_align.nf
index 58e0bcd3..5716f478 100644
--- a/modules/local/presto/presto_maskprimers_align.nf
+++ b/modules/local/presto/presto_maskprimers_align.nf
@@ -9,18 +9,16 @@ process PRESTO_MASKPRIMERS_ALIGN {
         'biocontainers/presto:0.7.1--pyhdfd78af_0' }"
 
     input:
-    tuple val(meta), path(R1), path(R2)
+    tuple val(meta), path(R1)
     path(cprimers)
 
     output:
-    tuple val(meta), path("*_R1_primers-pass.fastq"), path("*_R2_primers-pass.fastq") , emit: reads
-    path "*_command_log.txt", emit: logs
+    tuple val(meta), path("*_R1_primers-pass.fastq") , emit: reads
+    path "*_command_log_R1.txt", emit: logs
     path "*_R1.log"
-    path "*_R2.log"
     path "*.tab", emit: log_tab
     path "versions.yml" , emit: versions
 
-
     script:
     """
     MaskPrimers.py align --nproc ${task.cpus} \\
@@ -31,16 +29,8 @@ process PRESTO_MASKPRIMERS_ALIGN {
     --mode ${params.primer_mask_mode} \\
     --skiprc \\
     --outname ${meta.id}_R1 \\
-    --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-    MaskPrimers.py extract --nproc ${task.cpus} \\
-    -s $R2 \\
-    --start ${params.umi_length} \\
-    --len ${params.primer_extract_len} \\
-    --barcode \\
-    --mode ${params.primer_mask_mode} \\
-    --outname ${meta.id}_R2 \\
-    --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
-    ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID PRIMER ERROR
+    --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt
+    ParseLog.py -l ${meta.id}_R1.log -f ID PRIMER ERROR
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/presto/presto_maskprimers_extract.nf b/modules/local/presto/presto_maskprimers_extract.nf
new file mode 100644
index 00000000..7c58c306
--- /dev/null
+++ b/modules/local/presto/presto_maskprimers_extract.nf
@@ -0,0 +1,38 @@
+process PRESTO_MASKPRIMERS_EXTRACT {
+    tag "$meta.id"
+    label "process_high"
+    label 'immcantation'
+
+    conda "bioconda::presto=0.7.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' :
+        'biocontainers/presto:0.7.1--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(R2)
+
+    output:
+    tuple val(meta), path("*_R2_primers-pass.fastq") , emit: reads
+    path "*_command_log_R2.txt", emit: logs
+    path "*_R2.log"
+    path "*.tab", emit: log_tab
+    path "versions.yml" , emit: versions
+
+    script:
+    """
+    MaskPrimers.py extract --nproc ${task.cpus} \\
+    -s $R2 \\
+    --start ${params.umi_length} \\
+    --len ${params.primer_extract_len} \\
+    --barcode \\
+    --mode ${params.primer_mask_mode} \\
+    --outname ${meta.id}_R2 \\
+    --log ${meta.id}_R2.log >> ${meta.id}_command_log_R2.txt
+    ParseLog.py -l ${meta.id}_R2.log -f ID PRIMER ERROR
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        presto: \$( MaskPrimers.py --version | awk -F' '  '{print \$2}' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/presto/presto_maskprimers_postassembly.nf b/modules/local/presto/presto_maskprimers_postassembly.nf
index 8c6c742b..91427c90 100644
--- a/modules/local/presto/presto_maskprimers_postassembly.nf
+++ b/modules/local/presto/presto_maskprimers_postassembly.nf
@@ -39,10 +39,10 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY {
         """
     } else if (params.cprimer_position == "R2") {
         """
-        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r1_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r1_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-FWD \
             --log ${meta.id}-FWD.log > ${meta.id}_command_log.txt
-        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r2_maxerror} \
+        MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r2_maxerror} \
             --mode ${params.primer_mask_mode} --outname ${meta.id}-REV $revpr \
             --log ${meta.id}-REV.log >> ${meta.id}_command_log.txt
         ParseLog.py -l ${meta.id}-FWD.log ${meta.id}-REV.log -f ID PRIMER ERROR
diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf
index c3dc99c8..ab1fca77 100644
--- a/subworkflows/local/presto_umi.nf
+++ b/subworkflows/local/presto_umi.nf
@@ -10,6 +10,7 @@ include { FASTP                                          } from '../../modules/n
 include { PRESTO_FILTERSEQ      as  PRESTO_FILTERSEQ_UMI }      from '../../modules/local/presto/presto_filterseq'
 include { PRESTO_MASKPRIMERS    as  PRESTO_MASKPRIMERS_UMI }    from '../../modules/local/presto/presto_maskprimers'
 include { PRESTO_MASKPRIMERS_ALIGN }                            from '../../modules/local/presto/presto_maskprimers_align'
+include { PRESTO_MASKPRIMERS_EXTRACT }                          from '../../modules/local/presto/presto_maskprimers_extract'
 include { PRESTO_PAIRSEQ        as  PRESTO_PAIRSEQ_UMI }        from '../../modules/local/presto/presto_pairseq'
 include { PRESTO_CLUSTERSETS    as  PRESTO_CLUSTERSETS_UMI }    from '../../modules/local/presto/presto_clustersets'
 include { PRESTO_PARSE_CLUSTER  as  PRESTO_PARSE_CLUSTER_UMI }  from '../../modules/local/presto/presto_parse_cluster'
@@ -94,14 +95,31 @@ workflow PRESTO_UMI {
     ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_UMI.out.versions)
 
     if (params.maskprimers_align) {
+
+        ch_reads_R1 = PRESTO_FILTERSEQ_UMI.out.reads
+                                            .map{ reads -> [reads[0], reads[1]] }.dump(tag: 'ch_reads_R1')
+        ch_reads_R2 = PRESTO_FILTERSEQ_UMI.out.reads
+                                            .map{ reads -> [reads[0], reads[2]] }.dump(tag: 'ch_reads_R2')
         PRESTO_MASKPRIMERS_ALIGN(
-            PRESTO_FILTERSEQ_UMI.out.reads,
+            ch_reads_R1,
             ch_cprimers.collect()
         )
+        PRESTO_MASKPRIMERS_EXTRACT(
+            ch_reads_R2
+        )
+
         ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_ALIGN.out.versions)
-        ch_maskprimers_reads = PRESTO_MASKPRIMERS_ALIGN.out.reads
+        // Merge again R1 and R2 by sample ID.
+        ch_maskprimers_reads_R1 = PRESTO_MASKPRIMERS_ALIGN.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_reads_R1')
+        ch_maskprimers_reads_R2 = PRESTO_MASKPRIMERS_EXTRACT.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_reads_R2')
+        ch_maskprimers_reads = ch_maskprimers_reads_R1.join(ch_maskprimers_reads_R2)
+                                                        .map{ it -> [it[1], it[2], it[4]] }.dump(tag: 'ch_maskprimers_reads_after_remerge')
+
         ch_maskprimers_logs = PRESTO_MASKPRIMERS_ALIGN.out.logs
+        ch_maskprimers_logs = ch_maskprimers_logs.mix(PRESTO_MASKPRIMERS_EXTRACT.out.logs)
+
     } else {
+
         // Mask primers
         PRESTO_MASKPRIMERS_UMI (
             PRESTO_FILTERSEQ_UMI.out.reads,
@@ -111,6 +129,7 @@ workflow PRESTO_UMI {
         ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions)
         ch_maskprimers_reads = PRESTO_MASKPRIMERS_UMI.out.reads
         ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs
+
     }
 
 

From 5b6e02f041277abd7cc7087242a63809ff8b4287 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 19 Jan 2024 18:08:23 -0500
Subject: [PATCH 07/19] fix black formating

---
 bin/log_parsing.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/bin/log_parsing.py b/bin/log_parsing.py
index 4534a0d5..d262f51b 100755
--- a/bin/log_parsing.py
+++ b/bin/log_parsing.py
@@ -105,14 +105,14 @@
             with open(logfile, "r") as f:
                 for line in f:
                     if " START>" in line:
-                            s_code.append(logfile.split("/")[1].split("_command_log")[0])
-                            process_name.append(process)
+                        s_code.append(logfile.split("/")[1].split("_command_log")[0])
+                        process_name.append(process)
                     elif "SEQUENCES>" in line:
-                            n_seqs.append(line.strip().removeprefix("SEQUENCES> "))
+                        n_seqs.append(line.strip().removeprefix("SEQUENCES> "))
                     elif "PASS>" in line:
-                            n_pass.append(line.strip().removeprefix("PASS> "))
+                        n_pass.append(line.strip().removeprefix("PASS> "))
                     elif "FAIL>" in line:
-                            n_fail.append(line.strip().removeprefix("FAIL> "))
+                        n_fail.append(line.strip().removeprefix("FAIL> "))
 
         df_process = pd.DataFrame.from_dict(
             {
@@ -332,8 +332,6 @@
         df_process_list.append(df_process)
 
 
-
-
 # Tables provide extra info and help debugging
 df_process_list[0].to_csv(
     path_or_buf="Table_all_details_filter_quality.tsv",

From 6d96816824f0bb78e4ebe980070b5ca1bd3793d8 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 19 Jan 2024 20:05:20 -0500
Subject: [PATCH 08/19] add cregion alignment

---
 conf/modules.config                           | 12 ++++++++++
 conf/nebnext_umi.config                       |  5 +++++
 .../local/presto/presto_maskprimers_align.nf  | 15 ++++++++-----
 nextflow.config                               | 20 ++++++++++++++---
 subworkflows/local/presto_umi.nf              | 22 ++++++++++++++++---
 subworkflows/local/sequence_assembly.nf       | 17 ++++++++++++++
 6 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index d7bcc1ec..4cd6a4ee 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -152,6 +152,18 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = '--skiprc'
+        ext.args2 = '-f ID PRIMER ERROR'
+    }
+
+    withName: PRESTO_ALIGN_CREGION {
+        publishDir = [
+            path: { "${params.outdir}/presto/internal_cregion/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--skiprc --revpr --pf CREGION'
+        ext.args2 = '-f ID PRIMER ERROR --outname cregion_alignment'
     }
 
     withName: PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI {
diff --git a/conf/nebnext_umi.config b/conf/nebnext_umi.config
index 95e480a3..f1eec630 100644
--- a/conf/nebnext_umi.config
+++ b/conf/nebnext_umi.config
@@ -29,4 +29,9 @@ params {
     primer_r2_maxerror = 0.5
     assemblepairs_sequential = true
     maskprimers_align = false
+    align_cregion = true
+    internal_cregion_sequences = 'https://bitbucket.org/kleinstein/immcantation/raw/2025594fd9a2a64df4444070171d6fc00c4e78c7/protocols/AbSeq/AbSeq_Human_IG_InternalCRegion.fasta'
+    cregion_maxlen = 100
+    cregion_maxerror = 0.3
+    cregion_mask_mode = 'tag'
 }
diff --git a/modules/local/presto/presto_maskprimers_align.nf b/modules/local/presto/presto_maskprimers_align.nf
index 5716f478..055e5d93 100644
--- a/modules/local/presto/presto_maskprimers_align.nf
+++ b/modules/local/presto/presto_maskprimers_align.nf
@@ -11,6 +11,9 @@ process PRESTO_MASKPRIMERS_ALIGN {
     input:
     tuple val(meta), path(R1)
     path(cprimers)
+    val(max_len)
+    val(max_error)
+    val(mask_mode)
 
     output:
     tuple val(meta), path("*_R1_primers-pass.fastq") , emit: reads
@@ -20,17 +23,19 @@ process PRESTO_MASKPRIMERS_ALIGN {
     path "versions.yml" , emit: versions
 
     script:
+    def args = task.ext.args?: ''
+    def args2 = task.ext.args2?: ''
     """
     MaskPrimers.py align --nproc ${task.cpus} \\
     -s $R1 \\
     -p ${cprimers} \\
-    --maxlen ${params.primer_maxlen} \\
-    --maxerror ${params.primer_r1_maxerror} \\
-    --mode ${params.primer_mask_mode} \\
-    --skiprc \\
+    --maxlen ${max_len} \\
+    --maxerror ${max_error} \\
+    --mode ${mask_mode} \\
+    $args \\
     --outname ${meta.id}_R1 \\
     --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt
-    ParseLog.py -l ${meta.id}_R1.log -f ID PRIMER ERROR
+    ParseLog.py -l ${meta.id}_R1.log $args2
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/nextflow.config b/nextflow.config
index b970e397..ce45cbb4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -47,19 +47,33 @@ params {
     // --------------------------
     // sequence assembly options
     // --------------------------
+    // Filter sequences
     filterseq_q = 20
-    maskprimers_align = false
-    primer_extract_len = 0
-    primer_maxlen = 50
+
+    // Mask primers
+
     primer_r1_maxerror = 0.2
     primer_r2_maxerror = 0.2
     primer_mask_mode = 'cut'
+    maskprimers_align = false
+    primer_extract_len = 0
+    primer_maxlen = 50
+
+    // Build consensus
     primer_consensus = 0.6
     buildconsensus_maxerror = 0.1
     buildconsensus_maxgap = 0.5
     cluster_sets = true
+
+    // Assemble pairs
     assemblepairs_sequential = false
 
+    // internal cregion
+    internal_cregion_sequences = null
+    cregion_maxlen = 100
+    cregion_maxerror = 0.3
+    cregion_mask_mode = 'tag'
+
     // -----------------------
     // vdj annotation options
     // -----------------------
diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf
index ab1fca77..e32f2749 100644
--- a/subworkflows/local/presto_umi.nf
+++ b/subworkflows/local/presto_umi.nf
@@ -11,6 +11,7 @@ include { PRESTO_FILTERSEQ      as  PRESTO_FILTERSEQ_UMI }      from '../../modu
 include { PRESTO_MASKPRIMERS    as  PRESTO_MASKPRIMERS_UMI }    from '../../modules/local/presto/presto_maskprimers'
 include { PRESTO_MASKPRIMERS_ALIGN }                            from '../../modules/local/presto/presto_maskprimers_align'
 include { PRESTO_MASKPRIMERS_EXTRACT }                          from '../../modules/local/presto/presto_maskprimers_extract'
+include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_CREGION }    from '../../modules/local/presto/presto_maskprimers_align'
 include { PRESTO_PAIRSEQ        as  PRESTO_PAIRSEQ_UMI }        from '../../modules/local/presto/presto_pairseq'
 include { PRESTO_CLUSTERSETS    as  PRESTO_CLUSTERSETS_UMI }    from '../../modules/local/presto/presto_clustersets'
 include { PRESTO_PARSE_CLUSTER  as  PRESTO_PARSE_CLUSTER_UMI }  from '../../modules/local/presto/presto_parse_cluster'
@@ -31,6 +32,7 @@ workflow PRESTO_UMI {
     ch_cprimers    // channel: [ cprimers.fasta ]
     ch_vprimers    // channel: [ vprimers.fasta ]
     ch_adapter_fasta // channel: [ adapters.fasta ]
+    ch_internal_cregion // channel: [ internal_cregions.fasta ]
     ch_igblast
 
     main:
@@ -102,7 +104,10 @@ workflow PRESTO_UMI {
                                             .map{ reads -> [reads[0], reads[2]] }.dump(tag: 'ch_reads_R2')
         PRESTO_MASKPRIMERS_ALIGN(
             ch_reads_R1,
-            ch_cprimers.collect()
+            ch_cprimers.collect(),
+            params.primer_maxlen,
+            params.primer_r1_maxerror,
+            params.primer_mask_mode
         )
         PRESTO_MASKPRIMERS_EXTRACT(
             ch_reads_R2
@@ -191,7 +196,18 @@ workflow PRESTO_UMI {
         ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs
     }
 
-
+    if (params.align_cregion) {
+        PRESTO_ALIGN_CREGION(
+            ch_assemblepairs_reads,
+            ch_internal_cregion.collect(),
+            params.cregion_maxlen,
+            params.cregion_maxerror,
+            params.cregion_mask_mode
+        )
+        ch_parseheaders_reads = PRESTO_ALIGN_CREGION.out.reads
+    } else {
+        ch_parseheaders_reads = ch_assemblepairs_reads
+    }
 
     // Generate QC stats after reads paired and filtered but before collapsed
     FASTQC_POSTASSEMBLY_UMI (
@@ -201,7 +217,7 @@ workflow PRESTO_UMI {
 
     // Combine UMI duplicate count
     PRESTO_PARSEHEADERS_COLLAPSE_UMI (
-        ch_assemblepairs_reads
+        ch_parseheaders_reads
     )
     ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.versions)
 
diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf
index d3991d33..0ae99c83 100644
--- a/subworkflows/local/sequence_assembly.nf
+++ b/subworkflows/local/sequence_assembly.nf
@@ -85,6 +85,11 @@ workflow SEQUENCE_ASSEMBLY {
         if (params.umi_length < 2)  {
             error "The 'specific_pcr_umi' library generation method requires setting the '--umi_length' to a value greater than 1."
         }
+        if (params.internal_cregion_sequences) {
+            ch_internal_cregion = Channel.fromPath(params.internal_cregion_sequences, checkIfExists: true)
+        } else {
+            ch_internal_cregion = Channel.of([])
+        }
     } else if (params.library_generation_method == 'specific_pcr') {
         if (params.vprimers)  {
             ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true)
@@ -104,6 +109,9 @@ workflow SEQUENCE_ASSEMBLY {
         } else {
             params.umi_length = 0
         }
+        if (params.internal_cregion_sequences) {
+            error "Please do not set '--internal_cregion_sequences' when using the 'specific_pcr' library generation method without UMIs."
+        }
     } else if (params.library_generation_method == 'dt_5p_race_umi') {
         if (params.vprimers) {
             error "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option."
@@ -122,6 +130,11 @@ workflow SEQUENCE_ASSEMBLY {
         if (params.umi_length < 2)  {
             error "The oligo-dT 5'-RACE UMI 'dt_5p_race_umi' library generation method requires specifying the '--umi_length' to a value greater than 1."
         }
+        if (params.internal_cregion_sequences) {
+            ch_internal_cregion = Channel.fromPath(params.internal_cregion_sequences, checkIfExists: true)
+        } else {
+            ch_internal_cregion = Channel.of([])
+        }
     } else if (params.library_generation_method == 'dt_5p_race') {
         if (params.vprimers) {
             error "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option."
@@ -142,6 +155,9 @@ workflow SEQUENCE_ASSEMBLY {
         } else {
             params.umi_length = 0
         }
+        if (params.internal_cregion_sequences) {
+            error "Please do not set '--internal_cregion_sequences' when using the 'dt_5p_race' library generation method without UMIs."
+        }
     } else {
         error "The provided library generation method is not supported. Please check the docs for `--library_generation_method`."
     }
@@ -197,6 +213,7 @@ workflow SEQUENCE_ASSEMBLY {
             ch_cprimers_fasta,
             ch_vprimers_fasta,
             ch_adapter_fasta,
+            ch_internal_cregion,
             ch_igblast.collect()
         )
         ch_presto_fasta = PRESTO_UMI.out.fasta

From b766ac5af2f3c758cb4784b99bd56b23ef953449 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 23 Jan 2024 22:01:29 -0500
Subject: [PATCH 09/19] work on clontech umi profile

---
 conf/clontech_umi.config                      |   3 +
 conf/modules.config                           |  71 ++++++++++-
 conf/nebnext_umi.config                       |   1 +
 modules/local/presto/presto_buildconsensus.nf |   6 +-
 .../presto/presto_maskprimers_extract.nf      |   6 +-
 modules/local/presto/presto_pairseq.nf        |   3 +-
 nextflow.config                               |   1 +
 subworkflows/local/presto_umi.nf              | 117 +++++++++++++-----
 8 files changed, 163 insertions(+), 45 deletions(-)

diff --git a/conf/clontech_umi.config b/conf/clontech_umi.config
index d4bbdf79..4fe00bc2 100644
--- a/conf/clontech_umi.config
+++ b/conf/clontech_umi.config
@@ -26,6 +26,8 @@ params {
     vprimer_start = 0
     umi_length = 12
     umi_position = 'R2'
+    cluster_sets = false
+
 
     // Mask primer options
     maskprimers_align = true
@@ -34,4 +36,5 @@ params {
     primer_maxlen = 70
     primer_r1_maxerror = 0.2
     assemblepairs_sequential = true
+    primer_consensus = 0.6
 }
diff --git a/conf/modules.config b/conf/modules.config
index 4cd6a4ee..7ac94a44 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -152,8 +152,8 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.args = '--skiprc'
-        ext.args2 = '-f ID PRIMER ERROR'
+        ext.args = '--skiprc --pf CREGION'
+        ext.args2 = '-f ID CREGION ERROR'
     }
 
     withName: PRESTO_ALIGN_CREGION {
@@ -166,6 +166,16 @@ process {
         ext.args2 = '-f ID PRIMER ERROR --outname cregion_alignment'
     }
 
+    withName: PRESTO_MASKPRIMERS_EXTRACT {
+        publishDir = [
+            path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--barcode --bf BARCODE'
+        ext.args2 = '-f ID PRIMER ERROR PRSTART'
+    }
+
     withName: PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI {
         publishDir = [
             path: { "${params.outdir}/presto/03-maskprimers/${meta.id}" },
@@ -180,6 +190,16 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = "--coord illumina"
+    }
+
+    withName: PRESTO_PAIRSEQ_ALIGN {
+        publishDir = [
+            path: { "${params.outdir}/presto/03-pairseq/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--1f CREGION --coord illumina'
     }
 
     withName: PRESTO_CLUSTERSETS {
@@ -204,9 +224,20 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.args = ''
-        ext.args2 = ''
-        ext.args3 = 'ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT'
+        ext.args = '--pf PRIMER'
+        ext.args2 = '--pf PRIMER'
+        ext.args3 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT ERROR'
+    }
+
+    withName: PRESTO_BUILDCONSENSUS_ALIGN {
+        publishDir = [
+            path: { "${params.outdir}/presto/06-build-consensus/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--pf CREGION'
+        ext.args2 = '--pf CREGION'
+        ext.args3 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT ERROR'
     }
 
     withName: PRESTO_POSTCONSENSUS_PAIRSEQ {
@@ -234,7 +265,7 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
         ext.args = '--coord presto --rc tail --1f CONSCOUNT --2f CONSCOUNT PRCONS --minlen 8 --maxerror 0.3 --alpha 1e-5 --scanrev --minident 0.5 --evalue 1e-5 --maxhits 100 --aligner blastn'
-        ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE'
+        ext.args2 = '-f ID REFID LENGTH OVERLAP GAP ERROR PVALUE EVALUE1 EVALUE2 IDENTITY FIELDS1 FIELDS2'
     }
 
     withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI {
@@ -262,6 +293,14 @@ process {
         ext.args = 'PRCONS PRCONS'
     }
 
+    withName: PRESTO_PARSEHEADERS_CREGION {
+        publishDir = [
+            enabled: false
+        ]
+        ext.subcommand = 'rename'
+        ext.args = '-f PRCONS -k CREGION'
+    }
+
     withName: PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI {
         publishDir = [
             enabled: false
@@ -286,6 +325,26 @@ process {
         ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT'
     }
 
+    withName: PRESTO_COLLAPSESEQ_ALIGN {
+        publishDir = [
+            path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '-n 0 --inner --uf CREGION --cf CONSCOUNT --act sum --keepmiss'
+        ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT'
+    }
+
+    withName: PRESTO_COLLAPSESEQ_CREGION {
+        publishDir = [
+            path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '-n 0 --inner --uf PRCONS CREGION --cf CONSCOUNT --act sum --keepmiss'
+        ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT'
+    }
+
     withName: PRESTO_COLLAPSESEQ_SANS_UMI {
         publishDir = [
             path: { "${params.outdir}/presto/04-collapseseq/${meta.id}" },
diff --git a/conf/nebnext_umi.config b/conf/nebnext_umi.config
index f1eec630..13d9cfa2 100644
--- a/conf/nebnext_umi.config
+++ b/conf/nebnext_umi.config
@@ -23,6 +23,7 @@ params {
     cprimer_start = 0
     umi_length = 17
     umi_position = 'R2'
+    cluster_sets = false
 
     //presto options
     primer_r1_maxerror = 0.2
diff --git a/modules/local/presto/presto_buildconsensus.nf b/modules/local/presto/presto_buildconsensus.nf
index 6f5d9b20..2a85c3ea 100644
--- a/modules/local/presto/presto_buildconsensus.nf
+++ b/modules/local/presto/presto_buildconsensus.nf
@@ -25,9 +25,9 @@ process PRESTO_BUILDCONSENSUS {
     def args2 = task.ext.args2 ?: ''
     def args3 = task.ext.args3 ?: ''
     """
-    BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
-    BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args2} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
-    ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ${args3}
+    BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt
+    BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args2} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt
+    ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log ${args3}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/presto/presto_maskprimers_extract.nf b/modules/local/presto/presto_maskprimers_extract.nf
index 7c58c306..661389e0 100644
--- a/modules/local/presto/presto_maskprimers_extract.nf
+++ b/modules/local/presto/presto_maskprimers_extract.nf
@@ -19,16 +19,18 @@ process PRESTO_MASKPRIMERS_EXTRACT {
     path "versions.yml" , emit: versions
 
     script:
+    def args = task.ext.args?: ''
+    def args2 = task.ext.args2?: ''
     """
     MaskPrimers.py extract --nproc ${task.cpus} \\
     -s $R2 \\
     --start ${params.umi_length} \\
     --len ${params.primer_extract_len} \\
-    --barcode \\
+    $args \\
     --mode ${params.primer_mask_mode} \\
     --outname ${meta.id}_R2 \\
     --log ${meta.id}_R2.log >> ${meta.id}_command_log_R2.txt
-    ParseLog.py -l ${meta.id}_R2.log -f ID PRIMER ERROR
+    ParseLog.py -l ${meta.id}_R2.log $args2
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/presto/presto_pairseq.nf b/modules/local/presto/presto_pairseq.nf
index 1027c880..40ac33b1 100644
--- a/modules/local/presto/presto_pairseq.nf
+++ b/modules/local/presto/presto_pairseq.nf
@@ -18,8 +18,9 @@ process PRESTO_PAIRSEQ {
 
     script:
     def copyfield = (params.index_file | params.umi_position == 'R1') ? "--1f BARCODE" : "--2f BARCODE"
+    def args = task.ext.args?: ''
     """
-    PairSeq.py -1 ${meta.id}_R1.fastq -2 ${meta.id}_R2.fastq $copyfield --coord illumina > ${meta.id}_command_log.txt
+    PairSeq.py -1 ${meta.id}_R1.fastq -2 ${meta.id}_R2.fastq $copyfield $args > ${meta.id}_command_log.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/nextflow.config b/nextflow.config
index ce45cbb4..70faed51 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -69,6 +69,7 @@ params {
     assemblepairs_sequential = false
 
     // internal cregion
+    align_cregion = false
     internal_cregion_sequences = null
     cregion_maxlen = 100
     cregion_maxerror = 0.3
diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf
index e32f2749..0fdd7cd6 100644
--- a/subworkflows/local/presto_umi.nf
+++ b/subworkflows/local/presto_umi.nf
@@ -13,16 +13,21 @@ include { PRESTO_MASKPRIMERS_ALIGN }                            from '../../modu
 include { PRESTO_MASKPRIMERS_EXTRACT }                          from '../../modules/local/presto/presto_maskprimers_extract'
 include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_CREGION }    from '../../modules/local/presto/presto_maskprimers_align'
 include { PRESTO_PAIRSEQ        as  PRESTO_PAIRSEQ_UMI }        from '../../modules/local/presto/presto_pairseq'
+include { PRESTO_PAIRSEQ        as  PRESTO_PAIRSEQ_ALIGN } from '../../modules/local/presto/presto_pairseq'
 include { PRESTO_CLUSTERSETS    as  PRESTO_CLUSTERSETS_UMI }    from '../../modules/local/presto/presto_clustersets'
 include { PRESTO_PARSE_CLUSTER  as  PRESTO_PARSE_CLUSTER_UMI }  from '../../modules/local/presto/presto_parse_cluster'
 include { PRESTO_BUILDCONSENSUS as  PRESTO_BUILDCONSENSUS_UMI } from '../../modules/local/presto/presto_buildconsensus'
+include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_ALIGN } from '../../modules/local/presto/presto_buildconsensus'
 include { PRESTO_POSTCONSENSUS_PAIRSEQ as PRESTO_POSTCONSENSUS_PAIRSEQ_UMI }    from '../../modules/local/presto/presto_postconsensus_pairseq'
 include { PRESTO_ASSEMBLEPAIRS  as  PRESTO_ASSEMBLEPAIRS_UMI }  from '../../modules/local/presto/presto_assemblepairs'
 include { PRESTO_ASSEMBLEPAIRS_SEQUENTIAL }                     from '../../modules/local/presto/presto_assemblepairs_sequential'
 include { PRESTO_PARSEHEADERS   as  PRESTO_PARSEHEADERS_COLLAPSE_UMI } from '../../modules/local/presto/presto_parseheaders'
+include { PRESTO_PARSEHEADERS   as  PRESTO_PARSEHEADERS_CREGION }   from '../../modules/local/presto/presto_parseheaders'
 include { PRESTO_PARSEHEADERS_PRIMERS   as PRESTO_PARSEHEADERS_PRIMERS_UMI }    from '../../modules/local/presto/presto_parseheaders_primers'
 include { PRESTO_PARSEHEADERS_METADATA  as PRESTO_PARSEHEADERS_METADATA_UMI }   from '../../modules/local/presto/presto_parseheaders_metadata'
 include { PRESTO_COLLAPSESEQ    as PRESTO_COLLAPSESEQ_UMI }     from '../../modules/local/presto/presto_collapseseq'
+include { PRESTO_COLLAPSESEQ    as PRESTO_COLLAPSESEQ_ALIGN }   from '../../modules/local/presto/presto_collapseseq'
+include { PRESTO_COLLAPSESEQ    as PRESTO_COLLAPSESEQ_CREGION } from '../../modules/local/presto/presto_collapseseq'
 include { PRESTO_SPLITSEQ       as PRESTO_SPLITSEQ_UMI}         from '../../modules/local/presto/presto_splitseq'
 
 
@@ -96,6 +101,7 @@ workflow PRESTO_UMI {
     PRESTO_FILTERSEQ_UMI ( GUNZIP_UMI.out.reads )
     ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_UMI.out.versions)
 
+    // Mask primers
     if (params.maskprimers_align) {
 
         ch_reads_R1 = PRESTO_FILTERSEQ_UMI.out.reads
@@ -123,32 +129,36 @@ workflow PRESTO_UMI {
         ch_maskprimers_logs = PRESTO_MASKPRIMERS_ALIGN.out.logs
         ch_maskprimers_logs = ch_maskprimers_logs.mix(PRESTO_MASKPRIMERS_EXTRACT.out.logs)
 
+        PRESTO_PAIRSEQ_ALIGN( ch_maskprimers_reads )
+        ch_versions  = ch_versions.mix(PRESTO_PAIRSEQ_ALIGN.out.versions)
+        ch_for_clustersets = PRESTO_PAIRSEQ_ALIGN.out.reads
+        ch_pairseq_logs = PRESTO_PAIRSEQ_ALIGN.out.logs
+
     } else {
 
-        // Mask primers
         PRESTO_MASKPRIMERS_UMI (
             PRESTO_FILTERSEQ_UMI.out.reads,
             ch_cprimers.collect(),
             ch_vprimers.collect()
         )
         ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions)
-        ch_maskprimers_reads = PRESTO_MASKPRIMERS_UMI.out.reads
         ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs
 
-    }
-
+        // Pre-consensus pair
+        PRESTO_PAIRSEQ_UMI (
+            PRESTO_MASKPRIMERS_UMI.out.reads
+        )
+        ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions)
+        ch_for_clustersets = PRESTO_PAIRSEQ_UMI.out.reads
+        ch_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs
 
-    // Pre-consensus pair
-    PRESTO_PAIRSEQ_UMI (
-        ch_maskprimers_reads
-    )
-    ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions)
+    }
 
     if (params.cluster_sets) {
 
         // Cluster sequences by similarity
         PRESTO_CLUSTERSETS_UMI (
-            PRESTO_PAIRSEQ_UMI.out.reads
+            ch_for_clustersets
         )
         ch_versions = ch_versions.mix(PRESTO_CLUSTERSETS_UMI.out.versions)
 
@@ -161,19 +171,30 @@ workflow PRESTO_UMI {
         ch_clustersets_logs = PRESTO_CLUSTERSETS_UMI.out.logs.collect()
 
     } else {
-        ch_for_buildconsensus = PRESTO_PAIRSEQ_UMI.out.reads
+        ch_for_buildconsensus = ch_for_clustersets
         ch_clustersets_logs = Channel.empty()
     }
 
     // Build consensus of sequences with same UMI barcode
-    PRESTO_BUILDCONSENSUS_UMI (
-        ch_for_buildconsensus
-    )
-    ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_UMI.out.versions)
+    if (params.maskprimers_align) {
+        PRESTO_BUILDCONSENSUS_ALIGN (
+            ch_for_buildconsensus
+        )
+        ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_ALIGN.out.versions)
+        ch_postconsensus = PRESTO_BUILDCONSENSUS_ALIGN.out.reads
+        ch_buildconsensus_logs = PRESTO_BUILDCONSENSUS_ALIGN.out.logs
+    } else {
+        PRESTO_BUILDCONSENSUS_UMI (
+            ch_for_buildconsensus
+        )
+        ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_UMI.out.versions)
+        ch_postconsensus = PRESTO_BUILDCONSENSUS_UMI.out.reads
+        ch_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs
+    }
 
     // Post-consensus pair
     PRESTO_POSTCONSENSUS_PAIRSEQ_UMI (
-        PRESTO_BUILDCONSENSUS_UMI.out.reads
+        ch_postconsensus
     )
     ch_versions = ch_versions.mix(PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.versions)
 
@@ -196,6 +217,7 @@ workflow PRESTO_UMI {
         ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs
     }
 
+
     if (params.align_cregion) {
         PRESTO_ALIGN_CREGION(
             ch_assemblepairs_reads,
@@ -221,27 +243,56 @@ workflow PRESTO_UMI {
     )
     ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.versions)
 
-    // Annotate primers in C_PRIMER and V_PRIMER field
-    PRESTO_PARSEHEADERS_PRIMERS_UMI (
-        PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads
-    )
-    ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_UMI.out.versions)
+    // Annotate primer fields and collapse duplicates
+    if (params.maskprimers_align) {
+        // Rename primer field to CREGION
+        PRESTO_PARSEHEADERS_CREGION (
+            PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads
+        )
+        ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_CREGION.out.versions)
 
-    // Annotate metadata on primer headers
+        // Collapse duplicates
+        PRESTO_COLLAPSESEQ_ALIGN (
+            PRESTO_PARSEHEADERS_CREGION.out.reads
+        )
+        ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_ALIGN.out.versions)
+        ch_collapsed = PRESTO_COLLAPSESEQ_ALIGN.out.reads
+        ch_collapse_logs = PRESTO_COLLAPSESEQ_ALIGN.out.logs
+
+    } else {
+        // Annotate primers in C_PRIMER and V_PRIMER field
+        PRESTO_PARSEHEADERS_PRIMERS_UMI (
+            PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads
+        )
+        ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_UMI.out.versions)
+
+        if (params.align_cregion) {
+            PRESTO_COLLAPSESEQ_CREGION (
+                PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads
+            )
+            ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_CREGION.out.versions)
+            ch_collapsed = PRESTO_COLLAPSESEQ_CREGION.out.reads
+            ch_collapse_logs = PRESTO_COLLAPSESEQ_CREGION.out.logs
+        } else {
+            // Collapse duplicates
+            PRESTO_COLLAPSESEQ_UMI (
+                PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads
+            )
+            ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_UMI.out.versions)
+            ch_collapsed = PRESTO_COLLAPSESEQ_UMI.out.reads
+            ch_collapse_logs = PRESTO_COLLAPSESEQ_UMI.out.logs
+        }
+    }
+
+    // Annotate metadata on read headers
     PRESTO_PARSEHEADERS_METADATA_UMI (
-        PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads
+        ch_collapsed
     )
     ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_METADATA_UMI.out.versions)
 
-    // Mark and count duplicate sequences with different UMI barcodes (DUPCOUNT)
-    PRESTO_COLLAPSESEQ_UMI (
-        PRESTO_PARSEHEADERS_METADATA_UMI.out.reads
-    )
-    ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_UMI.out.versions)
-
     // Filter out sequences with less than 2 representative duplicates with different UMIs
     PRESTO_SPLITSEQ_UMI (
-        PRESTO_COLLAPSESEQ_UMI.out.reads
+        PRESTO_PARSEHEADERS_METADATA_UMI.out.reads
     )
     ch_versions = ch_versions.mix(PRESTO_SPLITSEQ_UMI.out.versions)
 
@@ -253,11 +304,11 @@ workflow PRESTO_UMI {
     fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_UMI.out.zip
     presto_filterseq_logs = PRESTO_FILTERSEQ_UMI.out.logs
     presto_maskprimers_logs = ch_maskprimers_logs.collect()
-    presto_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs.collect()
+    presto_pairseq_logs = ch_pairseq_logs.collect()
     presto_clustersets_logs = ch_clustersets_logs
-    presto_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs.collect()
+    presto_buildconsensus_logs = ch_buildconsensus_logs.collect()
     presto_postconsensus_pairseq_logs = PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.logs.collect()
     presto_assemblepairs_logs = ch_assemblepairs_logs.collect()
-    presto_collapseseq_logs = PRESTO_COLLAPSESEQ_UMI.out.logs.collect()
+    presto_collapseseq_logs = ch_collapse_logs.collect()
     presto_splitseq_logs = PRESTO_SPLITSEQ_UMI.out.logs.collect()
 }

From 9bd463336213f8b71cc5ff6b58857a806576600a Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 29 Jan 2024 21:26:44 -0500
Subject: [PATCH 10/19] add param schema

---
 nextflow_schema.json | 72 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0beb2e4a..042dcb99 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -218,17 +218,11 @@
                     "description": "Quality threshold for pRESTO FilterSeq sequence filtering.",
                     "fa_icon": "fas fa-filter"
                 },
-                "primer_maxerror": {
-                    "type": "number",
-                    "default": 0.2,
-                    "description": "Maximum primer scoring error in the pRESTO MaskPrimer step for the C and/or V region primers identification.",
-                    "fa_icon": "fas fa-align-center"
-                },
                 "primer_consensus": {
                     "type": "number",
                     "default": 0.6,
                     "description": "Maximum error for building the primer consensus in the pRESTO Buildconsensus step.",
-                    "fa_icon": "fas fa-align-center"
+                    "fa_icon": "fas fa-align-left"
                 },
                 "primer_mask_mode": {
                     "type": "string",
@@ -253,8 +247,70 @@
                 "cluster_sets": {
                     "type": "boolean",
                     "default": true,
-                    "fa_icon": "fas fa-layer-group",
+                    "fa_icon": "fas fa-align-center",
                     "description": "Cluster sequences by similarity regardless of any annotation with pRESTO ClusterSets and annotate the cluster ID additionally to the UMI barcode."
+                },
+                "primer_r1_maxerror": {
+                    "type": "number",
+                    "default": 0.2,
+                    "fa_icon": "fas fa-align-left",
+                    "description": "Maximum allowed error for R1 primer alignment."
+                },
+                "primer_r2_maxerror": {
+                    "type": "number",
+                    "default": 0.2,
+                    "fa_icon": "fas fa-align-right",
+                    "description": "Maximum allowed error for R2 primer alignment."
+                },
+                "maskprimers_align": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Align primers instead of scoring them. Used for protocols without primer fixed positions."
+                },
+                "primer_extract_len": {
+                    "type": "integer",
+                    "default": 0,
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Length of the extracted primers with MaskPrimer extract."
+                },
+                "primer_maxlen": {
+                    "type": "integer",
+                    "default": 50,
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Maximum allowed primer length when aligning the primers."
+                },
+                "assemblepairs_sequential": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Use AssemblePairs sequential instead of AssemblePairs align when assembling read pairs."
+                },
+                "align_cregion": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Align internal C-region for a more precise isotype characterization."
+                },
+                "internal_cregion_sequences": {
+                    "type": "string",
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Provide internal C-region sequences for a more precise C-region characterization. Then also set the `align_cregion` flag."
+                },
+                "cregion_maxlen": {
+                    "type": "integer",
+                    "default": 100,
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Maximum allowed length when aligning the internal C-region."
+                },
+                "cregion_maxerror": {
+                    "type": "number",
+                    "default": 0.3,
+                    "fa_icon": "fas fa-align-center",
+                    "description": "Maximum allowed error when aligning the internal C-region."
+                },
+                "cregion_mask_mode": {
+                    "type": "string",
+                    "default": "tag",
+                    "fa_icon": "fas fa-mask",
+                    "description": "Mask mode for C-region alignment."
                 }
             },
             "fa_icon": "fas fa-align-center"

From 4ae4b1b50aadb8f2a8f9de73b6b5c3ec68a25757 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sat, 3 Feb 2024 22:00:37 -0500
Subject: [PATCH 11/19] add protocols documentation

---
 ...ech_umi.config => clontech_umi_bcr.config} |  4 +-
 conf/clontech_umi_tcr.config                  | 44 ++++++++++
 ...next_umi.config => nebnext_umi_bcr.config} |  6 +-
 conf/nebnext_umi_tcr.config                   | 41 ++++++++++
 docs/usage.md                                 | 80 +++++++++++++++++++
 nextflow.config                               |  7 +-
 6 files changed, 174 insertions(+), 8 deletions(-)
 rename conf/{clontech_umi.config => clontech_umi_bcr.config} (87%)
 create mode 100644 conf/clontech_umi_tcr.config
 rename conf/{nebnext_umi.config => nebnext_umi_bcr.config} (86%)
 create mode 100644 conf/nebnext_umi_tcr.config

diff --git a/conf/clontech_umi.config b/conf/clontech_umi_bcr.config
similarity index 87%
rename from conf/clontech_umi.config
rename to conf/clontech_umi_bcr.config
index 4fe00bc2..f5458dfb 100644
--- a/conf/clontech_umi.config
+++ b/conf/clontech_umi_bcr.config
@@ -5,14 +5,14 @@
     Defines input files and everything required to run a fast and simple pipeline test.
 
     Use as follows:
-        nextflow run nf-core/airrflow -profile nebnext_human,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/airrflow -profile clontech_umi_bcr,<docker/singularity> --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
 
 params {
     config_profile_name        = 'Takara Bio / Clontech SMARTer v2'
-    config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) protocol profile'
+    config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) BCR protocol profile'
 
     mode = 'fastq'
 
diff --git a/conf/clontech_umi_tcr.config b/conf/clontech_umi_tcr.config
new file mode 100644
index 00000000..d620dcee
--- /dev/null
+++ b/conf/clontech_umi_tcr.config
@@ -0,0 +1,44 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile clontech_umi_tcr,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Takara Bio / Clontech SMARTer v2 TCR'
+    config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) TCR protocol profile'
+
+    mode = 'fastq'
+
+    library_generation_method = 'dt_5p_race_umi'
+
+    cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/16f94088c1df5c7a0ee1c9ea8b403cd4d2488e8a/protocols/Universal/Human_TR_CRegion_RC.fasta'
+
+    // primer options
+    cprimer_position = 'R1'
+    cprimer_start = 0
+    vprimer_start = 0
+    umi_length = 12
+    umi_position = 'R2'
+    cluster_sets = false
+
+
+    // Mask primer options
+    maskprimers_align = true
+    primer_extract_len = 7
+    primer_mask_mode = 'cut'
+    primer_maxlen = 70
+    primer_r1_maxerror = 0.2
+    assemblepairs_sequential = true
+    primer_consensus = 0.6
+
+    // TCR options
+    clonal_threshold = 0
+    skip_lineage = true
+}
diff --git a/conf/nebnext_umi.config b/conf/nebnext_umi_bcr.config
similarity index 86%
rename from conf/nebnext_umi.config
rename to conf/nebnext_umi_bcr.config
index 13d9cfa2..7467bffe 100644
--- a/conf/nebnext_umi.config
+++ b/conf/nebnext_umi_bcr.config
@@ -5,14 +5,14 @@
     Defines input files and everything required to run a fast and simple pipeline test.
 
     Use as follows:
-        nextflow run nf-core/airrflow -profile nebnext_human,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/airrflow -profile nebnext_umi_bcr,<docker/singularity> --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
 
 params {
-    config_profile_name        = 'NEBNext - AbSeq profile'
-    config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq experimental protocol'
+    config_profile_name        = 'NEBNext - AbSeq BCR profile'
+    config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq (UMI) BCR experimental protocol'
 
     mode = 'fastq'
     cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R1_Human_IG_Primers.fasta'
diff --git a/conf/nebnext_umi_tcr.config b/conf/nebnext_umi_tcr.config
new file mode 100644
index 00000000..e030d952
--- /dev/null
+++ b/conf/nebnext_umi_tcr.config
@@ -0,0 +1,41 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile nebnext_umi_tcr,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'NEBNext - AbSeq TCR profile'
+    config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq (UMI) TCR experimental protocol'
+
+    mode = 'fastq'
+    cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/16f94088c1df5c7a0ee1c9ea8b403cd4d2488e8a/protocols/AbSeq/AbSeq_R1_Human_TR_Primers.fasta'
+    race_linker = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R2_TS.fasta'
+
+    library_generation_method = 'dt_5p_race_umi'
+    cprimer_position = 'R1'
+    cprimer_start = 0
+    umi_length = 17
+    umi_position = 'R2'
+    cluster_sets = false
+
+    //presto options
+    primer_r1_maxerror = 0.2
+    primer_r2_maxerror = 0.5
+    assemblepairs_sequential = true
+    maskprimers_align = false
+    align_cregion = false
+    cregion_maxlen = 100
+    cregion_maxerror = 0.3
+    cregion_mask_mode = 'tag'
+
+    //TCR options
+    clonal_threshold = 0
+    skip_lineage
+}
diff --git a/docs/usage.md b/docs/usage.md
index aafc8349..eecdbb28 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -29,6 +29,16 @@ nextflow run nf-core/airrflow \
 --outdir results
 ```
 
+You can optionally set a protocol profile if you're running the pipeline with data from one of the supported profiles. The full list of supported profiles can be found in the section [Supported protocol profiles](#supported-protcol-profiles). An example command running the NEBNext UMI protocol profile with docker containers is:
+
+```bash
+nextflow run nf-core/airrflow \
+-profile nebnext_umi,docker \
+--mode fastq \
+--input input_samplesheet.tsv \
+--outdir results
+```
+
 A typical command for running the pipeline departing from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is:
 
 ```bash
@@ -165,6 +175,76 @@ nf-core/airrflow offers full support for the [AIRR standards 1.4](https://docs.a
 | biomaterial_provider      | Samplesheet column |                               | Name of sample biomaterial provider                   |
 | library_generation_method | Parameter          | `--library_generation_method` | Generic type of library generation                    |
 
+## Supported protocol profiles
+
+### NEBNext Immune Sequencing Kit
+
+- [New England Biolabs NEBNext Immune sequencing kit](https://www.neb.com/en-us/products/e6320-nebnext-immune-sequencing-kit-human#Product%20Information)
+
+You can use the `nebnext_umi_bcr` or `nebnext_umi_tcr` preset defaults for analyzing bulk fastq sequencing data that was generated with the NEB Immune Profiling kit. An example using docker containers for the analysis is:
+
+```bash
+nextflow run nf-core/airrflow -r <release> \
+-profile nebnext_umi_bcr,docker \
+--input input_samplesheet.tsv \
+--outdir results
+```
+
+This profile executes the commands based on the pRESTO pre-set pipeline [presto-abseq.sh](https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/presto-abseq.sh). A summary of the performed steps is:
+
+- Filter sequences by base quality.
+- Score and mask the provided R1 primers and R2 template switch oligo. Primer defaults are taken from the [Immcantation repository](https://bitbucket.org/kleinstein/immcantation/src/master/protocols/AbSeq/).
+- Pair sequences, build UMI consensus sequence.
+- Assemble read pairs with the pRESTO `AssemblePairs sequential` option.
+- Align and annotate the internal C Region (for the BCR specific protocol) for a more specific isotype annotation.
+- Remove duplicate sequences and filter to sequences with at least 2 supporting sources.
+
+Please note that the default primer sequences and internal CRegion sequences are for human. If you wish to run this protocol on mouse or other species, please provide the alternative primers:
+
+```bash
+nextflow run nf-core/airrflow -r <release> \
+-profile nebnext_umi_bcr,docker \
+--input input_samplesheet.tsv \
+--cprimers <path/to/constant_region_primers> \
+--internal_cregion_sequences <path/to/internal_cregion_sequences> \
+--outdir results
+```
+
+### Clontech / Takara SMARTer Human BCR Profiling kit
+
+- [TaKaRa SMARTer Human BCR kit](https://www.takarabio.com/products/next-generation-sequencing/immune-profiling/human-repertoire/human-bcr-profiling-kit-for-illumina-sequencing)
+
+You can use the `clontech_umi_bcr` or `clontech_umi_tcr` preset defaults for analyzing bulk fastq sequencing data that was generated with the Takara SMARTer Human Profiling kit. An example using docker containers for the analysis is:
+
+```bash
+nextflow run nf-core/airrflow -r <release> \
+-profile clontech_umi_bcr,docker \
+--input input_samplesheet.tsv \
+--outdir results
+```
+
+This profile executes the sequence assembly commands based on the pRESTO pre-set pipeline [presto-clontech-umi.sh](https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/presto-clontech-umi.sh). A summary of the performed steps is:
+
+- Filter sequences by base quality.
+- Align and annotate the universal C region seqeunces in the R1 reads. Defaults are taken from the [Immcantation repository](https://bitbucket.org/kleinstein/immcantation/src/master/protocols/Universal/).
+- Identify the primers sequences and UMI (12 nt length) in the R2 reads.
+- Pair sequences, build UMI consensus sequence.
+- Assemble read pairs with the pRESTO `AssemblePairs sequential` option.
+- Align and annotate the C Region sequences.
+- Remove duplicate sequences and filter to sequences with at least 2 supporting sources.
+
+After the sequence assembly steps, the remaining steps are common for all protocols.
+
+Please note that the default primer sequences and internal CRegion sequences are for human. If you wish to run this protocol on mouse or other species, please provide the alternative primer sequences:
+
+```bash
+nextflow run nf-core/airrflow -r <release> \
+-profile clontech_umi_bcr,docker \
+--input input_samplesheet.tsv \
+--cprimers <path/to/reverse_complemented_universal_Cregion_sequences> \
+--outdir results
+```
+
 ## Supported bulk library generation methods (protocols)
 
 When processing bulk sequencing data departing from raw `fastq` reads, several sequencing protocols are supported which can be provided with the parameter `--library_generation_method`.
diff --git a/nextflow.config b/nextflow.config
index 70faed51..918da8ec 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -287,9 +287,10 @@ profiles {
     test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' }
     test_nocluster { includeConfig 'conf/test_nocluster.config' }
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
-    test_igblast { includeConfig 'conf/test_igblast.config' }
-    nebnext_umi { includeConfig 'conf/nebnext_umi.config' }
-    clontech_umi { includeConfig 'conf/clontech_umi.config' }
+    nebnext_umi_tcr { includeConfig 'conf/nebnext_umi_tcr.config' }
+    nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' }
+    clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' }
+    clontech_umi_tcr { includeConfig 'conf/clontech_umi_tcr.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

From 7691cf97a2b525063475d627487b4370e95453ad Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:14:22 -0500
Subject: [PATCH 12/19] add test profiles

---
 CHANGELOG.md                  |  1 +
 conf/test_clontech_umi.config | 29 +++++++++++++++++++++++++++++
 conf/test_nebnext_umi.config  | 29 +++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)
 create mode 100644 conf/test_clontech_umi.config
 create mode 100644 conf/test_nebnext_umi.config

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 91d36e0e..e0e9aebb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ### `Added`
 
 - [#294](https://github.com/nf-core/airrflow/pull/294) Merge template updates nf-core/tools v2.11.1
+- [#299](https://github.com/nf-core/airrflow/pull/299) Add profile for common NEB and TAKARA protocols
 
 ### `Fixed`
 
diff --git a/conf/test_clontech_umi.config b/conf/test_clontech_umi.config
new file mode 100644
index 00000000..caf01710
--- /dev/null
+++ b/conf/test_clontech_umi.config
@@ -0,0 +1,29 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile test_clontech_umi,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile for TAKARA protocol'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-clontech/samplesheet.tsv'
+
+    imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
+    igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
+
+    includeConfig 'clontech_umi_bcr.config'
+}
diff --git a/conf/test_nebnext_umi.config b/conf/test_nebnext_umi.config
new file mode 100644
index 00000000..d3af9e79
--- /dev/null
+++ b/conf/test_nebnext_umi.config
@@ -0,0 +1,29 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/airrflow -profile test_nebnext_umi,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile for NEBNext protocol'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-neb/samplesheet.tsv'
+
+    imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
+    igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
+
+    includeConfig 'nebnext_umi_bcr.config'
+}

From 306024b10f810198135f4cdf979ac46d52420580 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:17:41 -0500
Subject: [PATCH 13/19] add tests to ci

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cac7308e..86abfab1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -49,7 +49,8 @@ jobs:
           - "23.04.0"
           - "latest-everything"
         profile:
-          ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm"]
+          ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm",
+          "test_clontech_umi", "test_nebnext_umi"]
       fail-fast: false
     steps:
       - name: Check out pipeline code

From c1334bce2362a976e7fa8d3e69b1078990ae0880 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:18:08 -0500
Subject: [PATCH 14/19] add test to ci workflow

---
 nextflow.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index 918da8ec..52aa535d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -287,6 +287,8 @@ profiles {
     test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' }
     test_nocluster { includeConfig 'conf/test_nocluster.config' }
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
+    test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' }
+    test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
     nebnext_umi_tcr { includeConfig 'conf/nebnext_umi_tcr.config' }
     nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' }
     clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' }

From 8cbae055558358b9dc6a291ba699fa23a060e35f Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:26:00 -0500
Subject: [PATCH 15/19] fix test configs

---
 conf/test_clontech_umi.config | 2 +-
 conf/test_nebnext_umi.config  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/conf/test_clontech_umi.config b/conf/test_clontech_umi.config
index caf01710..beb5b3cc 100644
--- a/conf/test_clontech_umi.config
+++ b/conf/test_clontech_umi.config
@@ -9,6 +9,7 @@
 
 ----------------------------------------------------------------------------------------
 */
+includeConfig 'clontech_umi_bcr.config'
 
 params {
     config_profile_name        = 'Test profile for TAKARA protocol'
@@ -25,5 +26,4 @@ params {
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
 
-    includeConfig 'clontech_umi_bcr.config'
 }
diff --git a/conf/test_nebnext_umi.config b/conf/test_nebnext_umi.config
index d3af9e79..5586225b 100644
--- a/conf/test_nebnext_umi.config
+++ b/conf/test_nebnext_umi.config
@@ -10,6 +10,8 @@
 ----------------------------------------------------------------------------------------
 */
 
+includeConfig 'nebnext_umi_bcr.config'
+
 params {
     config_profile_name        = 'Test profile for NEBNext protocol'
     config_profile_description = 'Minimal test dataset to check pipeline function'
@@ -25,5 +27,4 @@ params {
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
 
-    includeConfig 'nebnext_umi_bcr.config'
 }

From 2376f16c252e70287610012dc1e9d11e55240ed6 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:27:54 -0500
Subject: [PATCH 16/19] fix prettier

---
 .github/workflows/ci.yml | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 86abfab1..73d0cdbb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -49,8 +49,16 @@ jobs:
           - "23.04.0"
           - "latest-everything"
         profile:
-          ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm",
-          "test_clontech_umi", "test_nebnext_umi"]
+          [
+            "test_tcr",
+            "test_no_umi",
+            "test_nocluster",
+            "test_fetchimgt",
+            "test_assembled_hs",
+            "test_assembled_mm",
+            "test_clontech_umi",
+            "test_nebnext_umi",
+          ]
       fail-fast: false
     steps:
       - name: Check out pipeline code

From 62554ca869b2af4c6401e248151494b4e14e347b Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 13 Feb 2024 21:43:44 -0500
Subject: [PATCH 17/19] update test config

---
 conf/test_clontech_umi.config | 2 ++
 conf/test_nebnext_umi.config  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/conf/test_clontech_umi.config b/conf/test_clontech_umi.config
index beb5b3cc..552a7434 100644
--- a/conf/test_clontech_umi.config
+++ b/conf/test_clontech_umi.config
@@ -26,4 +26,6 @@ params {
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
 
+    clonal_threshold = 0.1
+
 }
diff --git a/conf/test_nebnext_umi.config b/conf/test_nebnext_umi.config
index 5586225b..c96b16b3 100644
--- a/conf/test_nebnext_umi.config
+++ b/conf/test_nebnext_umi.config
@@ -27,4 +27,6 @@ params {
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
 
+    clonal_threshold = 0.1
+
 }

From 3af5e591af28e42af1932b80d2d340b8892463ab Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 14 Feb 2024 09:45:45 -0500
Subject: [PATCH 18/19] fix precommit

---
 .github/workflows/download_pipeline.yml |  2 +-
 .nf-core.yml                            | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
index 8611458a..8a330045 100644
--- a/.github/workflows/download_pipeline.yml
+++ b/.github/workflows/download_pipeline.yml
@@ -64,4 +64,4 @@ jobs:
         env:
           NXF_SINGULARITY_CACHEDIR: ./
           NXF_SINGULARITY_HOME_MOUNT: true
-        run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results 
+        run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results
diff --git a/.nf-core.yml b/.nf-core.yml
index 6f253076..59995351 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -5,10 +5,10 @@ lint:
     - report_comment
   nextflow_config:
     - config_defaults:
-      - params.miairr
-      - params.report_rmd
-      - params.report_css
-      - params.report_logo
-      - params.report_logo_img
-      - params.config_profile_url
+        - params.miairr
+        - params.report_rmd
+        - params.report_css
+        - params.report_logo
+        - params.report_logo_img
+        - params.config_profile_url
 repository_type: pipeline

From affe6b6100bc876f95e0d858b658eab2aa4e44d7 Mon Sep 17 00:00:00 2001
From: ssnn <20683719+ssnn-airr@users.noreply.github.com>
Date: Fri, 16 Feb 2024 15:57:49 +0100
Subject: [PATCH 19/19] Update usage.md

typo
---
 docs/usage.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index eecdbb28..dd60f84b 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -29,7 +29,7 @@ nextflow run nf-core/airrflow \
 --outdir results
 ```
 
-You can optionally set a protocol profile if you're running the pipeline with data from one of the supported profiles. The full list of supported profiles can be found in the section [Supported protocol profiles](#supported-protcol-profiles). An example command running the NEBNext UMI protocol profile with docker containers is:
+You can optionally set a protocol profile if you're running the pipeline with data from one of the supported profiles. The full list of supported profiles can be found in the section [Supported protocol profiles](#supported-protocol-profiles). An example command running the NEBNext UMI protocol profile with docker containers is:
 
 ```bash
 nextflow run nf-core/airrflow \