diff --git a/bin/fasta2peptides.py b/bin/fasta2peptides.py
new file mode 100755
index 0000000..8d05bd6
--- /dev/null
+++ b/bin/fasta2peptides.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+import argparse
+import logging
+from collections import defaultdict
+from time import time
+from Bio import SeqIO
+
+# Configure logging
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    level=logging.INFO
+)
+
+def parse_fasta(fasta_file):
+    """Parses a FASTA file and returns a dictionary {header: peptide_col_name}"""
+    fasta_map = {}
+    with open(fasta_file) as f:
+        for record in SeqIO.parse(f, "fasta"):
+            fasta_map[record.id] = str(record.seq)
+    return fasta_map
+
+def generate_peptides(fasta_map, peptide_length):
+    """Generates peptides of a specific length from the input sequences."""
+    start_time = time()
+    peptides_set = set()
+
+    for header, seq in fasta_map.items():
+        for i in range(len(seq) - peptide_length + 1):
+            peptides_set.add((seq[i:i + peptide_length], header))
+
+    logging.info(f"Generated {len(peptides_set):,.0f} peptides of length {peptide_length} in {time() - start_time:.2f} seconds")
+    return peptides_set
+
+def group_peptides(peptides_set, peptide_col_name):
+    """Collapses identical peptides from different proteins and aggregates headers."""
+    start_time = time()
+    peptides_dict = defaultdict(lambda: {"protein_ids": set(), "counts": 0})
+
+    for sequence, protein_id in peptides_set:
+        peptides_dict[sequence]["protein_ids"].add(protein_id)
+        peptides_dict[sequence]["counts"] += 1
+
+    peptides = [
+        {peptide_col_name: seq, "protein_ids": ";".join(data["protein_ids"]), "counts": data["counts"]}
+        for seq, data in peptides_dict.items()
+    ]
+
+    logging.info(f"Grouped peptides in {time() - start_time:.2f} seconds")
+    return peptides
+
+def write_output(peptides, output_file, peptide_col_name):
+    """Writes the peptides data to a TSV file without using pandas."""
+    start_time = time()
+
+    with open(output_file, "w") as f:
+        # Write header
+        f.write(f"{peptide_col_name}\tprotein_ids\tcounts\n")
+        # Write data
+        for peptide in peptides:
+            f.write(f"{peptide[peptide_col_name]}\t{peptide['protein_ids']}\t{peptide['counts']}\n")
+
+    logging.info(f"Wrote {len(peptides):,.0f} peptides to {output_file} in {time() - start_time:.2f} seconds")
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate peptides of different lengths from a FASTA file and save as separate TSV files.")
+    parser.add_argument("-i", "--input", required=True, help="Input FASTA file")
+    parser.add_argument("-o", "--output_prefix", required=True, help="Output file prefix (each length will have its own file)")
+    parser.add_argument("-minl","--min_length", type=int, required=True, help="Minimum peptide length")
+    parser.add_argument("-maxl","--max_length", type=int, required=True, help="Maximum peptide length")
+    parser.add_argument("-pepcol","--peptide_col_name", type=str, required=True, help="Peptide column name")
+
+    args = parser.parse_args()
+
+    fasta_map = parse_fasta(args.input)
+
+    for k in range(args.min_length, args.max_length + 1):
+        peptides_set = generate_peptides(fasta_map, k)
+        peptides = group_peptides(peptides_set, args.peptide_col_name)
+        output_filename = f"{args.output_prefix}_length_{k}.tsv"
+        write_output(peptides, output_filename, args.peptide_col_name)
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/gen_peptides.py b/bin/gen_peptides.py
deleted file mode 100755
index d962ed3..0000000
--- a/bin/gen_peptides.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license (2022).
-
-import argparse
-
-import pandas as pd
-from Bio.SeqIO.FastaIO import SimpleFastaParser
-from epytope.Core import Protein, generate_peptides_from_proteins
-
-parser = argparse.ArgumentParser("Generating peptides from protein sequences.")
-parser.add_argument(
-    "-i", "--input", metavar="FILE", type=argparse.FileType("r"), help="FASTA filename containing proteins."
-)
-parser.add_argument(
-    "-o", "--output", metavar="FILE", type=argparse.FileType("w"), help="Output file containing peptides."
-)
-parser.add_argument(
-    "-min", "--min_length", metavar="N", type=int, help="Minimal length of peptides that will be generated."
-)
-parser.add_argument(
-    "-max", "--max_length", metavar="N", type=int, help="Maximum length of peptides that will be generated."
-)
-args = parser.parse_args()
-
-
-def read_protein_fasta(file):
-    # split at first whitespace and use short ID
-
-    collect = set()
-    # iterate over all FASTA entries:
-    for _id, seq in SimpleFastaParser(file):
-        # generate element:
-        _id = _id.split(" ")[0]
-
-        try:
-            collect.add(Protein(seq.strip().upper(), transcript_id=_id))
-        except TypeError:
-            collect.add(Protein(seq.strip().upper()))
-    return list(collect)
-
-
-proteins = read_protein_fasta(args.input)
-
-c = 0
-for k in range(args.min_length, args.max_length + 1):
-    peptides = generate_peptides_from_proteins(proteins, k)
-    # get proteins and corresponding counts
-    pd_peptides = pd.DataFrame(
-        [
-            (
-                str(pep),
-                ",".join([prot.transcript_id.split(" ")[0] for prot in pep.get_all_proteins()]),
-                ",".join([str(len(pep.proteinPos[prot.transcript_id])) for prot in pep.get_all_proteins()]),
-            )
-            for pep in peptides
-        ],
-        columns=["sequence", "protein_ids", "counts"],
-    )
-    # assign id
-    pd_peptides = pd_peptides.assign(id=[str(c + id) for id in pd_peptides.index])
-    c += len(pd_peptides["sequence"])
-
-    if k == args.min_length:
-        pd_peptides[["sequence", "id", "protein_ids", "counts"]].to_csv(args.output, sep="\t", index=False)
-    else:
-        pd_peptides[["sequence", "id", "protein_ids", "counts"]].to_csv(
-            args.output, sep="\t", index=False, mode="a", header=False
-        )
diff --git a/bin/split_peptides.py b/bin/split_peptides.py
index 4e31134..abf53cb 100755
--- a/bin/split_peptides.py
+++ b/bin/split_peptides.py
@@ -1,34 +1,49 @@
-#!/usr/bin/env python
-# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license (2022).
-
+#!/usr/bin/env python3
+# Written by Jonas Scheid the MIT license (2025).
 
 import argparse
 import math
+from pathlib import Path
+
+def split_peptides(input_file, output_base, min_size, max_chunks):
+    """Splits the peptide input file into smaller chunks in a single pass."""
+    input_path = Path(input_file)
+    output_base = Path(output_base)
+
+    with input_path.open("r") as infile:
+        lines = infile.readlines()  # Read all lines into memory
+
+    header, data_lines = lines[0], lines[1:]  # Separate header
+    total_size = len(data_lines)  # Number of peptides (excluding header)
+
+    if total_size == 0:
+        raise ValueError("Input file contains no peptides.")
+
+    # Determine number of chunks & chunk size
+    num_chunks = min(math.ceil(total_size / min_size), max_chunks)
+    chunk_size = max(min_size, math.ceil(total_size / num_chunks))
 
-parser = argparse.ArgumentParser("Split peptides input file.")
-parser.add_argument("-i", "--input", metavar="FILE", type=str, help="Input file containing peptides.")
-parser.add_argument("-o", "--output_base", type=str, help="Base filename for output files.")
-parser.add_argument(
-    "-s", "--min_size", metavar="N", type=int, help="Minimum number of peptides that should be written into one file."
-)
-parser.add_argument(
-    "-c", "--max_chunks", metavar="N", type=int, help="Maximum number of chunks that should be created."
-)
-args = parser.parse_args()
-
-with open(args.input) as infile:
-    tot_size = sum([1 for _ in infile]) - 1
-
-n = int(min(math.ceil(float(tot_size) / args.min_size), args.max_chunks))
-h = int(max(args.min_size, math.ceil(float(tot_size) / n)))
-
-with open(args.input) as infile:
-    header = next(infile)
-    for chunk in range(n):
-        with open(args.output_base + ".chunk_" + str(chunk) + ".tsv", "w") as outfile:
+    for chunk_idx in range(num_chunks):
+        chunk_file = output_base.with_name(f"{output_base.stem}_chunk_{chunk_idx}.tsv")
+        start = chunk_idx * chunk_size
+        end = start + chunk_size
+
+        with chunk_file.open("w") as outfile:
             outfile.write(header)
-            for _ in range(h):
-                try:
-                    outfile.write(next(infile))
-                except StopIteration:
-                    break
+            outfile.writelines(data_lines[start:end])
+
+        if end >= total_size:
+            break  # Stop if we've written all data
+
+def main():
+    parser = argparse.ArgumentParser(description="Split a peptide file into smaller chunks.")
+    parser.add_argument("-i", "--input", required=True, help="Input file containing peptides.")
+    parser.add_argument("-o", "--output_base", required=True, help="Base filename for output files.")
+    parser.add_argument("--min_size", type=int, required=True, help="Minimum peptides per file.")
+    parser.add_argument("--max_chunks", type=int, required=True, help="Maximum number of chunks.")
+
+    args = parser.parse_args()
+    split_peptides(args.input, args.output_base, args.min_size, args.max_chunks)
+
+if __name__ == "__main__":
+    main()
diff --git a/conf/modules.config b/conf/modules.config
index fb24018..334eebe 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -20,129 +20,20 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
-        publishDir = [
-            path: { "${params.outdir}/multiqc" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: EPYTOPE_CHECK_REQUESTED_MODELS {
-        publishDir = [
-            path: { "${params.outdir}/reports" },
-            mode: params.publish_dir_mode
-        ]
-        ext.args = "--tools ${params.tools} "
-    }
-
-    withName: EPYTOPE_CHECK_REQUESTED_MODELS_PEP {
-        publishDir = [
-            path: { "${params.outdir}/reports" },
-            mode: params.publish_dir_mode
-        ]
-        ext.args = "--tools ${params.tools} --peptides "
-    }
-
-    withName: PREPARE_PREDICTION_INPUT {
-        ext.prefix = {"${meta.sample}"}
-        publishDir = [
-            enabled: false
-        ]
-    }
-
-    withName: SYFPEITHI {
-        publishDir = [
-            path: { "${params.outdir}/syfpeithi" },
-            mode: params.publish_dir_mode
-        ]
-    }
-
-    withName: MHCFLURRY {
-            publishDir = [
-                path: { "${params.outdir}/mhcflurry" },
-                mode: params.publish_dir_mode,
-                pattern: '*.csv'
-            ]
-        }
-
-    withName: MHCNUGGETS {
-            ext.prefix = {"${meta.sample}"}
-            publishDir = [
-                path: { "${params.outdir}/mhcnuggets" },
-                mode: params.publish_dir_mode,
-                pattern: '*predicted_mhcnuggets.csv'
-            ]
-        }
-
-    withName: NETMHCPAN {
-        ext.args = "-BA"
-        publishDir = [
-            path: { "${params.outdir}/netmhcpan" },
-            mode: params.publish_dir_mode,
-            pattern: '*.xls'
-        ]
-    }
-
-    withName: NETMHCIIPAN {
-        ext.prefix = {"${meta.sample}"}
-        ext.args = "-BA"
-        publishDir = [
-            path: { "${params.outdir}/netmhciipan" },
-            mode: params.publish_dir_mode,
-            pattern: '*.xls'
-        ]
-    }
-
-    withName: MERGE_PREDICTIONS {
-        ext.prefix = {"${meta.sample}"}
-        publishDir = [
-            path: { "${params.outdir}/merged_predictions" },
-            mode: params.publish_dir_mode
-        ]
-    }
-
-    withName: EPYTOPE_GENERATE_PEPTIDES {
-        publishDir = [
-            path: { "${params.outdir}/generated_peptides/${meta.sample}" },
-            mode: params.publish_dir_mode
-        ]
-        ext.args = ''
-    }
-
-    withName: SPLIT_PEPTIDES_PEPTIDES {
-        ext.args = "--min_size ${params.peptides_split_minchunksize} --max_chunks ${params.peptides_split_maxchunks} "
-    }
-
-    withName: SPLIT_PEPTIDES_PROTEIN {
-        ext.args = "--min_size ${params.peptides_split_minchunksize} --max_chunks ${params.peptides_split_maxchunks} "
+    withName: GUNZIP_VCF {
+        publishDir  = [ enabled: false ]
     }
 
-    withName: EPYTOPE_PEPTIDE_PREDICTION_PROTEIN {
-        // Argument list needs to end with --peptides
-        ext.args   = [
-                    genome_reference != 'grch37' & genome_reference != 'grch38' ? "--genome_reference '${genome_reference}'" : '',
-                    genome_reference == 'grch37'                                ? "--genome_reference 'https://grch37.ensembl.org/'" : '',
-                    genome_reference == 'grch38'                                ? "--genome_reference 'https://www.ensembl.org'" : '',
-                    '--peptides'
-        ].join(' ').trim()
+    withName: VARIANT_SPLIT {
         publishDir = [
-            path: { "${params.outdir}/split_predictions/${meta.sample}" },
+            path: { "${params.outdir}/split_input/${meta.sample}" },
             mode: params.publish_dir_mode
         ]
     }
 
-    withName: EPYTOPE_PEPTIDE_PREDICTION_PEP {
-        // Argument list needs to end with --peptides
-        ext.args   = [
-                    genome_reference != 'grch37' & genome_reference != 'grch38' ? "--genome_reference '${genome_reference}'" : '',
-                    genome_reference == 'grch37'                                ? "--genome_reference 'https://grch37.ensembl.org/'" : '',
-                    genome_reference == 'grch38'                                ? "--genome_reference 'https://www.ensembl.org'" : '',
-                    '--peptides'
-        ].join(' ').trim()
+    withName: SNPSIFT_SPLIT {
         publishDir = [
-            path: { "${params.outdir}/split_predictions/${meta.sample}" },
+            path: { "${params.outdir}/split_input/${meta.sample}" },
             mode: params.publish_dir_mode
         ]
     }
@@ -161,81 +52,78 @@ process {
         ]
     }
 
-    withName: MERGE_JSON_SINGLE {
-        ext.args = " --single_input "
-    }
-
-    withName: MERGE_JSON_MULTI {
-        ext.args = " --input \$PWD "
+    withName: FASTA2PEPTIDES {
+        publishDir = [ enabled: false ]
     }
 
-    withName: CAT_TSV {
-        publishDir = [
-            path: { "${params.outdir}/predictions/${meta.sample}" },
-            mode: params.publish_dir_mode
-        ]
+    withName: SPLIT_PEPTIDES {
+        publishDir = [ enabled: false ]
     }
 
-    withName: CAT_FASTA {
-        publishDir = [
-            path: { "${params.outdir}/predictions/${meta.sample}" },
-            mode: params.publish_dir_mode
-        ]
+    withName: PREPARE_PREDICTION_INPUT {
+        ext.prefix = {"${meta.file_id}"}
+        publishDir = [ enabled: false ]
     }
 
-    withName: CSVTK_CONCAT {
+    withName: MHCFLURRY {
+        ext.prefix = {"${meta.file_id}"}
         publishDir = [
-            path: { "${params.outdir}/predictions/${meta.sample}" },
-            mode: params.publish_dir_mode
+            path: { "${params.outdir}/mhcflurry" },
+            mode: params.publish_dir_mode,
+            pattern: '*.csv'
         ]
     }
 
-    withName: GET_PREDICTION_VERSIONS {
+    withName: MHCNUGGETS {
+        ext.prefix = {"${meta.file_id}"}
         publishDir = [
-            path: { "${params.outdir}/reports" },
-            mode: params.publish_dir_mode
-        ]
-    }
-
-    withName: GUNZIP_VCF {
-        publishDir  = [
-            enabled: false
+            path: { "${params.outdir}/mhcnuggets" },
+            mode: params.publish_dir_mode,
+            pattern: '*predicted_mhcnuggets.csv'
         ]
     }
 
-    withName: MERGE_JSON {
+    withName: NETMHCPAN {
+        ext.prefix = {"${meta.file_id}"}
+        ext.args = "-BA"
         publishDir = [
-            path: { "${params.outdir}/predictions/${meta.sample}" },
-            mode: params.publish_dir_mode
+            path: { "${params.outdir}/netmhcpan" },
+            mode: params.publish_dir_mode,
+            pattern: '*.xls'
         ]
     }
 
-    withName: EPYTOPE_SHOW_SUPPORTED_MODELS {
+    withName: NETMHCIIPAN {
+        ext.prefix = {"${meta.file_id}"}
+        ext.args = "-BA"
         publishDir = [
-            path: { "${params.outdir}/supported_models" },
-            mode: params.publish_dir_mode
+            path: { "${params.outdir}/netmhciipan" },
+            mode: params.publish_dir_mode,
+            pattern: '*.xls'
         ]
     }
 
-    withName: VARIANT_SPLIT {
-        publishDir = [
-            path: { "${params.outdir}/split_input/${meta.sample}" },
-            mode: params.publish_dir_mode
-        ]
+    withName: MERGE_PREDICTIONS {
+        ext.prefix = {"${meta.file_id}"}
+        ext.args   = params.wide_format_output ? "--wide_format_output" : ""
+        publishDir = [ enabled: false ]
     }
 
-    withName: SNPSIFT_SPLIT {
+    withName: CSVTK_CONCAT {
+        ext.prefix = {"${meta.sample}"}
         publishDir = [
-            path: { "${params.outdir}/split_input/${meta.sample}" },
-            mode: params.publish_dir_mode
+            path: { "${params.outdir}/predictions/${meta.sample}" },
+            mode: params.publish_dir_mode,
+            pattern: '*.tsv'
         ]
     }
 
-    withName: SPLIT_PEPTIDES {
+    withName: MULTIQC {
+        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
         publishDir = [
-            path: { "${params.outdir}/split_input/${meta.sample}" },
-            mode: params.publish_dir_mode
+            path: { "${params.outdir}/multiqc" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
-
 }
diff --git a/modules.json b/modules.json
index 91ef878..7f1d421 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "csvtk/concat": {
+                        "branch": "master",
+                        "git_sha": "aa5c23023134cf2d8b75a95d53557890e40261b9",
+                        "installed_by": ["modules"]
+                    },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
diff --git a/modules/local/csvtk_concat.nf b/modules/local/csvtk_concat.nf
deleted file mode 100644
index f3d986c..0000000
--- a/modules/local/csvtk_concat.nf
+++ /dev/null
@@ -1,39 +0,0 @@
-process CSVTK_CONCAT {
-    label 'process_low'
-
-    conda "bioconda::csvtk=0.23.0"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/csvtk:0.23.0--h9ee0642_0' :
-        'biocontainers/csvtk:0.23.0--h9ee0642_0' }"
-
-    input:
-    tuple val(meta), path(predicted)
-
-    output:
-    tuple val(meta), path("*prediction_result.tsv"), emit: predicted
-    path "versions.yml", emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    """
-    csvtk concat -t $predicted > ${meta.sample}_prediction_result_unsorted.tmp
-    csvtk sort -k chr:n,length:n ${meta.sample}_prediction_result_unsorted.tmp -t --out-file ${meta.sample}_prediction_result.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    touch ${meta.sample}_prediction_result.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/epytope_generate_peptides.nf b/modules/local/epytope_generate_peptides.nf
deleted file mode 100644
index c0d2bf3..0000000
--- a/modules/local/epytope_generate_peptides.nf
+++ /dev/null
@@ -1,50 +0,0 @@
-process EPYTOPE_GENERATE_PEPTIDES {
-    label 'process_low'
-    tag "${meta.sample}"
-
-    conda "bioconda::epytope=3.3.1"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/epytope:3.3.1--pyh7cba7a3_0' :
-        'biocontainers/epytope:3.3.1--pyh7cba7a3_0' }"
-
-    input:
-    tuple val(meta), path(raw)
-
-    output:
-    tuple val(meta), path("*.tsv"), emit: splitted
-    path "versions.yml", emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
-    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
-    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII
-
-    """
-    gen_peptides.py --input ${raw} \\
-    --max_length ${max_length} \\
-    --min_length ${min_length} \\
-    --output '${prefix}.tsv' \\
-    $task.ext.args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        epytope: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('epytope').version)")
-        python: \$(python --version 2>&1 | sed 's/Python //g')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
-    """
-    touch ${prefix}.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        epytope: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('epytope').version)")
-        python: \$(python --version 2>&1 | sed 's/Python //g')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/fasta2peptides/environment.yml b/modules/local/fasta2peptides/environment.yml
new file mode 100644
index 0000000..6bd963e
--- /dev/null
+++ b/modules/local/fasta2peptides/environment.yml
@@ -0,0 +1,5 @@
+channels:
+- conda-forge
+- bioconda
+dependencies:
+- bioconda::biopython=1.85
diff --git a/modules/local/fasta2peptides/main.nf b/modules/local/fasta2peptides/main.nf
new file mode 100644
index 0000000..2240990
--- /dev/null
+++ b/modules/local/fasta2peptides/main.nf
@@ -0,0 +1,58 @@
+process FASTA2PEPTIDES {
+    label 'process_low'
+    tag "${meta.sample}"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/8372f6241b480332d91bc00a88ec8c72c8f7fcc9994177a5dd67a07007cd6e32/data' :
+        'community.wave.seqera.io/library/biopython:1.85--6f761292fa9881b4' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("*.tsv"), emit: tsv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.sample}"
+    def min_length = meta.mhc_class == "I" ? params.min_peptide_length_classI : params.min_peptide_length_classII
+    def max_length = meta.mhc_class == "I" ? params.max_peptide_length_classI : params.max_peptide_length_classII
+    def pep_col = params.peptide_col_name ? "${params.peptide_col_name}" : "sequence"
+
+    """
+    fasta2peptides.py \\
+        -i $fasta \\
+        -o ${prefix} \\
+        -minl ${min_length} \\
+        -maxl ${max_length} \\
+        -pepcol ${pep_col} \\
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version 2>&1 | cut -d' ' -f2)
+        biopython: \$(python3 -c "import Bio; print(Bio.__version__)")
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.sample}"
+    def min_length = meta.mhc_class == "I" ? params.min_peptide_length_classI : params.min_peptide_length_classII
+    def max_length = meta.mhc_class == "I" ? params.max_peptide_length_classI : params.max_peptide_length_classII
+
+    """
+    touch ${prefix}_length_${min_length}.tsv
+    touch ${prefix}_length_${max_length}.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version 2>&1 | cut -d' ' -f2)
+        biopython: \$(python3 -c "import Bio; print(Bio.__version__)")
+    END_VERSIONS
+    """
+
+}
diff --git a/modules/local/merge_predictions/main.nf b/modules/local/merge_predictions/main.nf
index 9a39f4e..5c81a69 100644
--- a/modules/local/merge_predictions/main.nf
+++ b/modules/local/merge_predictions/main.nf
@@ -8,12 +8,11 @@ process MERGE_PREDICTIONS {
         'biocontainers/mhcgnomes:1.8.6--pyh7cba7a3_0' }"
 
     input:
-    tuple val(meta), path(prediction_files)
+    tuple val(meta), path(prediction_files), path(source_file)
 
     output:
     tuple val(meta), path("*.tsv"), emit: merged
-    path "versions.yml", emit: versions
-
+    path "versions.yml"           , emit: versions
 
     script:
     //TODO handle the thresholds (parse the --tools_thresholds and --use_affinity_thresholds)
@@ -21,7 +20,7 @@ process MERGE_PREDICTIONS {
 
     stub:
     def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
+    def prefix     = task.ext.prefix ?: "${meta.id}"
     """
     touch merged_prediction.tsv
     touch versions.yml
diff --git a/modules/local/merge_predictions/templates/merge_predictions.py b/modules/local/merge_predictions/templates/merge_predictions.py
index 01e55fe..987b1f0 100644
--- a/modules/local/merge_predictions/templates/merge_predictions.py
+++ b/modules/local/merge_predictions/templates/merge_predictions.py
@@ -32,8 +32,10 @@ class Arguments:
 
     def __init__(self) -> None:
         self.input = "$prediction_files".split(" ")
+        self.source_file = "$source_file"
         self.prefix = "$task.ext.prefix" if "$task.ext.prefix" != "null" else "$meta.id"
         self.alleles = sorted("$meta.alleles".split(';'))
+        self.peptide_col_name = "sequence"
         self.parse_ext_args("$task.ext.args")
 
     def parse_ext_args(self, args_string: str) -> None:
@@ -47,6 +49,8 @@ def parse_ext_args(self, args_string: str) -> None:
         # Parse the extended arguments
         args_list = shlex.split(args_string)  # Split the string into a list of arguments
         parser = argparse.ArgumentParser()
+        # Define custom script argument
+        parser.add_argument("--wide_format_output", action="store_true")
         # input parameters
         args = parser.parse_args(args_list)
 
@@ -101,11 +105,44 @@ def BAtoic50(BA: float) -> float:
         """Convert binding affinity (BA) to IC50."""
         return 10 ** ((1 - BA) * math.log10(50000))
 
+    @staticmethod
+    def longTowide(df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transforms a long-format DataFrame into a wide-format DataFrame,
+        where 'predictor-allele-BA', 'predictor-allele-rank', and 'predictor-allele-binder'
+        become separate columns.
+
+        Parameters:
+            df (pd.DataFrame): The original long-format DataFrame.
+
+        Returns:
+            pd.DataFrame: Transformed wide-format DataFrame.
+        """
+        # Identify non-predictor columns to keep as index
+        meta_columns = [col for col in df.columns if col not in ['predictor', 'allele', 'BA', 'rank', 'binder']]
+
+        # Pivot to wide format
+        df_wide = df.pivot_table(
+            index=meta_columns,
+            columns=['predictor', 'allele'],
+            values=['BA', 'rank', 'binder'],
+            aggfunc='first'  # Assuming first occurrence is enough if duplicates exist
+        )
+
+        # Flatten the MultiIndex columns
+        df_wide.columns = [f"{pred}_{allele}_{val}" for val, pred, allele in df_wide.columns]
+
+        # Reset index
+        df_wide.reset_index(inplace=True)
+
+        return df_wide
+
+
 class PredictionResult:
-    def __init__(self, file_path, alleles, threshold=2):
+    def __init__(self, file_path, alleles, peptide_col_name):
         self.file_path = file_path
         self.alleles = alleles
-        self.threshold = threshold
+        self.peptide_col_name = peptide_col_name
         self.predictor = None
         self.prediction_df = self._format_prediction_result()
 
@@ -150,10 +187,10 @@ def _format_mhcflurry_prediction(self) -> pd.DataFrame:
         # Convert IC50 to BA
         df['BA'] = df['mhcflurry_affinity'].apply(Utils.ic50toBA)
         # Harmonize df to desired output structure
-        df.rename(columns={"peptide": "sequence", "mhcflurry_presentation_percentile": "rank"}, inplace=True)
-        df = df[['sequence', 'allele', 'rank', 'BA']]
-        df["binder"] = df["rank"] <= PredictorBindingThreshold.MHCFLURRY.value
-        df["predictor"] = self.predictor
+        df.rename(columns={'peptide': self.peptide_col_name, 'mhcflurry_presentation_percentile': 'rank'}, inplace=True)
+        df = df[[self.peptide_col_name, 'allele', 'rank', 'BA']]
+        df['binder'] = df['rank'] <= PredictorBindingThreshold.MHCFLURRY.value
+        df['predictor'] = self.predictor
 
         return df
 
@@ -163,11 +200,11 @@ def _format_mhcnuggets_prediction(self) -> pd.DataFrame:
         # Convert IC50 to BA
         df['BA'] = df['ic50'].apply(Utils.ic50toBA)
         # Harmonize df to desired output structure
-        df.rename(columns={"peptide": "sequence", "human_proteome_rank": "rank"}, inplace=True)
-        df = df[['sequence', 'allele', 'rank', 'BA']]
+        df.rename(columns={'peptide': self.peptide_col_name, 'human_proteome_rank': 'rank'}, inplace=True)
+        df = df[[self.peptide_col_name, 'allele', 'rank', 'BA']]
         # Use IC50 < 500 as threshold since mhcnuggets provides a different ranking compared to other predictors
-        df["binder"] = df["BA"] >= PredictorBindingThreshold.MHCNUGGETS.value
-        df["predictor"] = self.predictor
+        df['binder'] = df['BA'] >= PredictorBindingThreshold.MHCNUGGETS.value
+        df['predictor'] = self.predictor
 
         return df
 
@@ -178,24 +215,24 @@ def _format_netmhcpan_prediction(self) -> pd.DataFrame:
         df = pd.read_csv(self.file_path, sep='\t', skiprows=1)
         # Extract Peptide, percentile rank, binding affinity
         df = df[df.columns[df.columns.str.contains('Peptide|EL_Rank|BA-score')]]
-        df = df.rename(columns={'Peptide':'sequence','EL_Rank':'EL_Rank.0','BA-score':'BA-score.0'})
+        df = df.rename(columns={'Peptide':self.peptide_col_name,'EL_Rank':'EL_Rank.0','BA-score':'BA-score.0'})
         # to longformat based on .0|1|2..
         df_long = pd.melt(
             df,
-            id_vars=["sequence"],
-            value_vars=[col for col in df.columns if col != "sequence"],
-            var_name="metric",
-            value_name="value",
+            id_vars=[self.peptide_col_name],
+            value_vars=[col for col in df.columns if col != self.peptide_col_name],
+            var_name='metric',
+            value_name='value',
         )
 
         # Extract the allele information (e.g., .0, .1, etc.)
-        df_long["allele"] = df_long["metric"].str.split('.').str[1]
-        df_long["metric"] = df_long["metric"].apply(lambda x: x.split('.')[0].replace('EL_Rank','rank').replace('BA-score','BA'))
+        df_long['allele'] = df_long['metric'].str.split('.').str[1]
+        df_long['metric'] = df_long['metric'].apply(lambda x: x.split('.')[0].replace('EL_Rank','rank').replace('BA-score','BA'))
 
         # Pivot table to organize columns properly
-        df_pivot = df_long.pivot_table(index=["sequence", "allele"], columns="metric", values="value").reset_index()
-        df_pivot['allele'] = [alleles_dict[int(index.strip("."))] for index in df_pivot['allele']]
-        df_pivot['binder'] = df_pivot['rank'] <= self.threshold
+        df_pivot = df_long.pivot_table(index=[self.peptide_col_name, 'allele'], columns='metric', values='value').reset_index()
+        df_pivot['allele'] = [alleles_dict[int(index.strip('.'))] for index in df_pivot['allele']]
+        df_pivot['binder'] = df_pivot['rank'] <= PredictorBindingThreshold.NETMHCPAN.value
         df_pivot['predictor'] = self.predictor
         df_pivot.index.name = ''
 
@@ -212,22 +249,22 @@ def _format_netmhciipan_prediction(self) -> pd.DataFrame:
         df = pd.read_csv(self.file_path, sep='\t', skiprows=1)
         # Extract Peptide, percentile rank, binding affinity
         df = df[df.columns[df.columns.str.contains('Peptide|Rank(?!_BA)|Score_BA')]]
-        df = df.rename(columns={'Peptide':'sequence','Rank':'Rank.0','Score_BA':'Score_BA.0'})
+        df = df.rename(columns={'Peptide':self.peptide_col_name,'Rank':'Rank.0','Score_BA':'Score_BA.0'})
         # to longformat based on .0|1|2..
         df_long = pd.melt(
             df,
-            id_vars=["sequence"],
-            value_vars=[col for col in df.columns if col != "sequence"],
-            var_name="metric",
-            value_name="value",
+            id_vars=[self.peptide_col_name],
+            value_vars=[col for col in df.columns if col != self.peptide_col_name],
+            var_name='metric',
+            value_name='value',
         )
         # Extract the allele information (e.g., .0, .1, etc.)
-        df_long["allele"] = df_long["metric"].str.split('.').str[1]
-        df_long["metric"] = df_long["metric"].apply(lambda x: x.split('.')[0].replace('Rank','rank').replace('Score_BA','BA'))
+        df_long['allele'] = df_long['metric'].str.split('.').str[1]
+        df_long['metric'] = df_long['metric'].apply(lambda x: x.split('.')[0].replace('Rank','rank').replace('Score_BA','BA'))
 
         # Pivot table to organize columns properly
-        df_pivot = df_long.pivot_table(index=["sequence", "allele"], columns="metric", values="value").reset_index()
-        df_pivot['allele'] = [alleles_dict[int(index.strip("."))] for index in df_pivot['allele']]
+        df_pivot = df_long.pivot_table(index=[self.peptide_col_name, 'allele'], columns='metric', values='value').reset_index()
+        df_pivot['allele'] = [alleles_dict[int(index.strip('.'))] for index in df_pivot['allele']]
         df_pivot['binder'] = df_pivot['rank'] <= PredictorBindingThreshold.NETMHCIIPAN.value
         df_pivot['predictor'] = self.predictor
         df_pivot.index.name = ''
@@ -240,14 +277,25 @@ def main():
     # Iterate over each file predicted by multiple predictors, harmonize and merge output
     output_df = []
     for file in args.input:
-        result = PredictionResult(file, args.alleles)
+        result = PredictionResult(file, args.alleles, args.peptide_col_name)
 
         logging.info(f"Writing {len(result.prediction_df)} {result.predictor} predictions to file..")
         output_df.append(result.prediction_df)
 
     output_df = pd.concat(output_df)
-    # Normalize allele names and write to file
+    # Normalize allele names
     output_df['allele'] = output_df['allele'].apply(lambda x : mhcgnomes.parse(x).to_string())
+
+    # Read in source file to annotate source metadata
+    source_df = pd.read_csv(args.source_file, sep='\t')
+    # Merge the prediction results with the source file
+    output_df = pd.merge(output_df, source_df, on=args.peptide_col_name, how='left')
+
+    # Transform output to wide format if specified
+    if args.wide_format_output:
+        output_df = Utils.longTowide(output_df)
+
+    # Write output file
     output_df.to_csv(f'{args.prefix}_predictions.tsv', sep='\t', index=False)
 
     # Parse versions
diff --git a/modules/local/merge_predictions/templates/versions.yml b/modules/local/merge_predictions/templates/versions.yml
deleted file mode 100644
index a16678a..0000000
--- a/modules/local/merge_predictions/templates/versions.yml
+++ /dev/null
@@ -1 +0,0 @@
-${task.process}:\n  argparse: 1.1\n  pandas: 1.5.3\n
\ No newline at end of file
diff --git a/modules/local/mhcflurry/main.nf b/modules/local/mhcflurry/main.nf
index 41e9030..3281b8b 100644
--- a/modules/local/mhcflurry/main.nf
+++ b/modules/local/mhcflurry/main.nf
@@ -11,7 +11,7 @@ process MHCFLURRY {
     containerOptions = (workflow.containerEngine == 'docker') ? '-u $(id -u) -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' : ''
 
     input:
-    tuple val(meta), path(peptide_file)
+    tuple val(meta), path(csv)
 
     output:
     tuple val(meta), path("*.csv"), emit: predicted
@@ -22,12 +22,12 @@ process MHCFLURRY {
         error("MHCflurry prediction of ${meta.sample} is not possible with MHC class II!")
     }
     def args   = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.sample}"
+    def prefix = task.ext.prefix ?: "${meta.id}"
 
     """
     mhcflurry-downloads fetch models_class1_presentation
     mhcflurry-predict \\
-        $peptide_file \\
+        $csv \\
         --out ${prefix}_predicted_mhcflurry.csv \\
         $args
 
@@ -38,8 +38,7 @@ process MHCFLURRY {
     """
 
     stub:
-    def args   = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: meta.sample
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}_predicted_mhcflurry.csv
 
diff --git a/modules/local/mhcnuggets/main.nf b/modules/local/mhcnuggets/main.nf
index f941758..ef9d926 100644
--- a/modules/local/mhcnuggets/main.nf
+++ b/modules/local/mhcnuggets/main.nf
@@ -19,8 +19,8 @@ process MHCNUGGETS {
     template "mhcnuggets.py"
 
     stub:
-    def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: "${meta.sample}"
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
     """
     touch ${prefix}_predicted_mhcnuggets.tsv
 
diff --git a/modules/local/mhcnuggets/templates/mhcnuggets.py b/modules/local/mhcnuggets/templates/mhcnuggets.py
index 1c6b1ce..eb261a8 100644
--- a/modules/local/mhcnuggets/templates/mhcnuggets.py
+++ b/modules/local/mhcnuggets/templates/mhcnuggets.py
@@ -3,6 +3,7 @@
 import argparse
 import shlex
 import logging
+from pathlib import Path
 
 import pandas as pd
 from mhcnuggets.src.predict import predict
diff --git a/modules/local/netmhciipan/main.nf b/modules/local/netmhciipan/main.nf
index f244e43..ae8c567 100644
--- a/modules/local/netmhciipan/main.nf
+++ b/modules/local/netmhciipan/main.nf
@@ -4,11 +4,11 @@ process NETMHCIIPAN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'oras://community.wave.seqera.io/library/bash_gawk_perl_tcsh:59b949a8a9f0816b' :
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/de/de9c5fbcc5583f3c096617ef2c8f84c5e69b479cc5a5944f10d0e1d226779662/data' :
         'community.wave.seqera.io/library/bash_gawk_perl_tcsh:a941b4e9bd4b8805' }"
 
     input:
-    tuple val(meta), path(peptide_file), path(software)
+    tuple val(meta), path(tsv), path(software)
 
     output:
     tuple val(meta), path("*.xls"), emit: predicted
@@ -18,8 +18,8 @@ process NETMHCIIPAN {
     if (meta.mhc_class != "II") {
         error "NETMHCIIPAN only supports MHC class II. Use NETMHCIIPAN for MHC class II."
     }
-    def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: "${meta.id}"
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
     // Adjust for netMHCIIpan allele format (e.g. DRB1_0101, HLA-DPA10103-DPB10101)
     def alleles = meta.alleles_supported.tokenize(';')
                     .collect {
@@ -30,7 +30,7 @@ process NETMHCIIPAN {
 
     """
     netmhciipan/netMHCIIpan \
-        -f $peptide_file \
+        -f $tsv \
 		-inptype 1 \
         -a $alleles \
         -xls \
@@ -45,7 +45,7 @@ process NETMHCIIPAN {
 
     stub:
     def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
+    def prefix     = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}_predicted_netmhciipan.xls
 
diff --git a/modules/local/netmhcpan/main.nf b/modules/local/netmhcpan/main.nf
index 64516b1..220772d 100644
--- a/modules/local/netmhcpan/main.nf
+++ b/modules/local/netmhcpan/main.nf
@@ -4,11 +4,11 @@ process NETMHCPAN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'oras://community.wave.seqera.io/library/bash_gawk_perl_tcsh:59b949a8a9f0816b' :
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/de/de9c5fbcc5583f3c096617ef2c8f84c5e69b479cc5a5944f10d0e1d226779662/data' :
         'community.wave.seqera.io/library/bash_gawk_perl_tcsh:a941b4e9bd4b8805' }"
 
     input:
-    tuple val(meta), path(peptide_file), path(software)
+    tuple val(meta), path(tsv), path(software)
 
     output:
     tuple val(meta), path("*.xls"), emit: predicted
@@ -18,13 +18,13 @@ process NETMHCPAN {
     if (meta.mhc_class != "I") {
         error "NETMHCPAN only supports MHC class I. Use NETMHCIIPAN for MHC class II."
     }
-    def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
-    def alleles    = meta.alleles_supported.tokenize(';').collect { it.replace('*', '') }.join(',')
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def alleles = meta.alleles_supported.tokenize(';').collect { it.replace('*', '') }.join(',')
 
     """
     netmhcpan/netMHCpan \
-        -p $peptide_file \
+        -p $tsv \
         -a $alleles \
         -xls \
         -xlsfile ${prefix}_predicted_netmhcpan.xls \
@@ -37,8 +37,8 @@ process NETMHCPAN {
     """
 
     stub:
-    def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}_predicted_netmhcpan.xls
 
diff --git a/modules/local/prepare_prediction_input/main.nf b/modules/local/prepare_prediction_input/main.nf
index 49531d2..0da7b0c 100644
--- a/modules/local/prepare_prediction_input/main.nf
+++ b/modules/local/prepare_prediction_input/main.nf
@@ -20,7 +20,7 @@ process PREPARE_PREDICTION_INPUT {
 
     stub:
     def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
+    def prefix     = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}_syfpeithi.csv
     touch ${prefix}_mhcflurry.csv
diff --git a/modules/local/prepare_prediction_input/templates/prepare_prediction_input.py b/modules/local/prepare_prediction_input/templates/prepare_prediction_input.py
index 423fce0..5a806cc 100644
--- a/modules/local/prepare_prediction_input/templates/prepare_prediction_input.py
+++ b/modules/local/prepare_prediction_input/templates/prepare_prediction_input.py
@@ -2,9 +2,10 @@
 
 import argparse
 import shlex
-from enum import Enum
-import logging
 import json
+import logging
+from enum import Enum
+from pathlib import Path
 
 import pandas as pd
 import mhcgnomes
@@ -144,24 +145,17 @@ def keep_supported_alleles(allele_ls: list, tool: str, supported_alleles_tool: d
 
         return tool_allele_input
 
+    def has_valid_aas(peptide: str) -> bool:
+        """
+        Check if a peptide contains only valid amino acids.
+        """
+        valid_aas = "ACDEFGHIKLMNPQRSTVWY"
+        return all(aa in valid_aas for aa in peptide)
+
 
 def main():
     args = Arguments()
 
-    ### DEBUGGING INPUT
-    #args.input = '/mnt/volume/data/epitopeprediction/peptides.tsv'
-    #args.supported_alleles_json = '/mnt/volume/dev/mhcgnomes/supported_alleles.json'
-    #args.mhc_class = 'I'
-    #args.alleles='/mnt/volume/dev/epitopeprediction-2/modules/local/check_supported_alleles/templates/test_alleles.txt'
-    #args.tools = ['mhcflurry', 'mhcnuggets', 'mhcnuggetsii', 'netmhcpan','netmhciipan']
-    #args.prefix = "test"
-    #args.min_peptide_length_classI = 8
-    #args.max_peptide_length_classI = 12
-    #args.min_peptide_length_classII = 12
-    #args.max_peptide_length_classII = 25
-    #args.peptide_col_name = "sequence"
-    ###
-
     # Parse alleles to uniform format
     alleles_normalized = Utils.parse_alleles(args.alleles)
     supported_alleles_dict = json.load(open(args.supported_alleles_json))
@@ -176,13 +170,13 @@ def main():
     # Parse input file to desired format of tools
     df_input = pd.read_csv(args.input, sep="\t")
     logging.info(f"Read file with {len(df_input)} peptides.")
-
+    # Filter peptides with invalid amino acids
+    df_input = df_input[df_input[args.peptide_col_name].apply(Utils.has_valid_aas)]
     # Filter peptides based on user-defined length
     if args.mhc_class == "I":
         df = df_input[df_input[args.peptide_col_name].str.len().between(args.min_peptide_length_classI, args.max_peptide_length_classI)]
     else:
         df = df_input[df_input[args.peptide_col_name].str.len().between(args.min_peptide_length_classII, args.max_peptide_length_classII)]
-
     if len(df) == 0:
         raise ValueError("No peptides left after applying length filters! Aborting..")
 
diff --git a/modules/local/split_peptides/environment.yml b/modules/local/split_peptides/environment.yml
new file mode 100644
index 0000000..f399d67
--- /dev/null
+++ b/modules/local/split_peptides/environment.yml
@@ -0,0 +1,5 @@
+channels:
+- conda-forge
+- bioconda
+dependencies:
+- conda-forge::python=3.11.0
diff --git a/modules/local/split_peptides.nf b/modules/local/split_peptides/main.nf
similarity index 54%
rename from modules/local/split_peptides.nf
rename to modules/local/split_peptides/main.nf
index 998b18a..e48c4f2 100644
--- a/modules/local/split_peptides.nf
+++ b/modules/local/split_peptides/main.nf
@@ -1,28 +1,31 @@
 process SPLIT_PEPTIDES {
     label 'process_low'
+    tag "${meta.sample}"
 
-    conda "conda-forge::python=3.8.3"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
-        'biocontainers/python:3.8.3' }"
+        'https://depot.galaxyproject.org/singularity/python:3.11' :
+        'biocontainers/python:3.11' }"
 
     input:
-    tuple val(meta), path(peptide)
+    tuple val(meta), path(tsv)
 
     output:
     tuple val(meta), path("*.tsv"), emit: splitted
-    path "versions.yml", emit: versions
+    path "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def prefix = task.ext.suffix ? "${peptide.baseName}_${task.ext.suffix}" : "${peptide.baseName}"
+    def prefix = task.ext.suffix ?: "${tsv.baseName}"
 
     """
-    split_peptides.py --input ${peptide} \\
-    --output_base "${prefix}" \\
-    $task.ext.args
+    split_peptides.py \\
+        --input $tsv \\
+        --output_base "${prefix}" \\
+        --min_size ${params.peptides_split_minchunksize} \\
+        --max_chunks ${params.peptides_split_maxchunks} \\
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -31,7 +34,8 @@ process SPLIT_PEPTIDES {
     """
 
     stub:
-    def prefix = task.ext.suffix ? "${peptide.baseName}_${task.ext.suffix}" : "${peptide.baseName}"
+    def prefix = task.ext.suffix ?: "${tsv.getExtension()}"
+
     """
     touch ${prefix}_1.tsv
     touch ${prefix}_2.tsv
diff --git a/modules/nf-core/csvtk/concat/environment.yml b/modules/nf-core/csvtk/concat/environment.yml
new file mode 100644
index 0000000..12087be
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bioconda::csvtk=0.31.0
diff --git a/modules/nf-core/csvtk/concat/main.nf b/modules/nf-core/csvtk/concat/main.nf
new file mode 100644
index 0000000..9f17a9b
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/main.nf
@@ -0,0 +1,55 @@
+process CSVTK_CONCAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/csvtk:0.31.0--h9ee0642_0' :
+        'biocontainers/csvtk:0.31.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(csv, name: 'inputs/csv*/*')
+    val in_format
+    val out_format
+
+    output:
+    tuple val(meta), path("${prefix}.${out_extension}"), emit: csv
+    path "versions.yml"                                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
+    def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
+    out_extension = out_format == "tsv" ? 'tsv' : 'csv'
+    """
+    csvtk \\
+        concat \\
+        $args \\
+        --num-cpus $task.cpus \\
+        --delimiter "${delimiter}" \\
+        --out-delimiter "${out_delimiter}" \\
+        --out-file ${prefix}.${out_extension} \\
+        $csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    out_extension = out_format == "tsv" ? 'tsv' : 'csv'
+    """
+    touch ${prefix}.${out_extension}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/csvtk/concat/meta.yml b/modules/nf-core/csvtk/concat/meta.yml
new file mode 100644
index 0000000..27ffc1c
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/meta.yml
@@ -0,0 +1,52 @@
+name: csvtk_concat
+description: Concatenate two or more CSV (or TSV) tables into a single table
+keywords:
+  - concatenate
+  - tsv
+  - csv
+tools:
+  - csvtk:
+      description: A cross-platform, efficient, practical CSV/TSV toolkit
+      homepage: http://bioinf.shenwei.me/csvtk
+      documentation: http://bioinf.shenwei.me/csvtk
+      tool_dev_url: https://github.com/shenwei356/csvtk
+      licence: ["MIT"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - csv:
+        type: file
+        description: CSV/TSV formatted files
+        pattern: "*.{csv,tsv}"
+  - - in_format:
+        type: string
+        description: Input format (csv, tab, or a delimiting character)
+        pattern: "*"
+  - - out_format:
+        type: string
+        description: Output format (csv, tab, or a delimiting character)
+        pattern: "*"
+output:
+  - csv:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}.${out_extension}:
+          type: file
+          description: Concatenated CSV/TSV file
+          pattern: "*.{csv,tsv}"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "version.yml"
+authors:
+  - "@rpetit3"
+maintainers:
+  - "@rpetit3"
diff --git a/modules/nf-core/csvtk/concat/tests/main.nf.test b/modules/nf-core/csvtk/concat/tests/main.nf.test
new file mode 100644
index 0000000..b6c1a58
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/tests/main.nf.test
@@ -0,0 +1,72 @@
+// nf-core modules test csvtk/concat
+nextflow_process {
+
+    name "Test Process CSVTK_CONCAT"
+    script "../main.nf"
+    process "CSVTK_CONCAT"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "csvtk"
+    tag "csvtk/concat"
+
+    test("tsv - concat - csv") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    [
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_hybrid.csv", checkIfExists: true),
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_long.csv", checkIfExists: true),
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_short.csv", checkIfExists: true),
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_short.csv", checkIfExists: true)
+                    ]
+                ]
+                input[1] = "tsv"
+                input[2] = "csv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("tsv - concat - csv - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    [
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_hybrid.csv", checkIfExists: true),
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_long.csv", checkIfExists: true),
+                        file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_short.csv", checkIfExists: true)
+                    ]
+                ]
+                input[1] = "tsv"
+                input[2] = "csv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/csvtk/concat/tests/main.nf.test.snap b/modules/nf-core/csvtk/concat/tests/main.nf.test.snap
new file mode 100644
index 0000000..254d34a
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "tsv - concat - csv - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,c203a84cc5b289951b70302549dcf08d"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,c203a84cc5b289951b70302549dcf08d"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.3"
+        },
+        "timestamp": "2025-01-08T04:46:46.133640633"
+    },
+    "tsv - concat - csv": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,bb0ed52999b6b24297bcefb3c29f0a5c"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,c203a84cc5b289951b70302549dcf08d"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.csv:md5,bb0ed52999b6b24297bcefb3c29f0a5c"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,c203a84cc5b289951b70302549dcf08d"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.3"
+        },
+        "timestamp": "2025-01-08T04:46:31.419386462"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/csvtk/concat/tests/tags.yml b/modules/nf-core/csvtk/concat/tests/tags.yml
new file mode 100644
index 0000000..0d10e7c
--- /dev/null
+++ b/modules/nf-core/csvtk/concat/tests/tags.yml
@@ -0,0 +1,2 @@
+csvtk/concat:
+  - "modules/nf-core/csvtk/concat/**"
diff --git a/nextflow.config b/nextflow.config
index 95864e5..d915baf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -10,11 +10,13 @@
 params {
     // Input options
     input                        = null
+    peptide_col_name             = 'sequence'
     peptides_split_maxchunks     = 100
-    peptides_split_minchunksize  = 5000
+    peptides_split_minchunksize  = 500
     split_by_variants            = false
     split_by_variants_size       = 0
     split_by_variants_distance   = 110000
+    wide_format_output           = false
 
     // References
     genome_reference = 'grch37'
@@ -26,7 +28,6 @@ params {
 
     // Options: Filtering
     filter_self           = false
-    show_supported_models = false
 
     // External tools
     external_tools_meta = "${projectDir}/assets/external_tools_meta.json"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5b9cba1..7765f00 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -66,6 +66,11 @@
             "description": "Options for the peptide prediction step.",
             "fa_icon": "far fa-chart-bar",
             "properties": {
+                "peptide_col_name": {
+                    "type": "string",
+                    "default": "sequence",
+                    "description": "Specifies the column name in the input file that contains the peptide sequences."
+                },
                 "filter_self": {
                     "type": "boolean",
                     "description": "Filter against human proteome.",
@@ -119,10 +124,11 @@
                     "description": "Specifies that sequences of proteins, affected by provided variants, will be written to a FASTA file.",
                     "help_text": "Specifies that sequences of proteins that are affected by the provided genomic variants are written to a `FASTA` file. The resulting `FASTA` file will contain the wild-type and mutated protein sequences."
                 },
-                "show_supported_models": {
+                "wide_format_output": {
                     "type": "boolean",
-                    "description": "Writes out supported prediction models.",
-                    "help_text": "Writes out supported models. Does not run any predictions."
+                    "default": false,
+                    "description": "Specifies that the output file will be in wide format.",
+                    "help_text": "Transforms output file such that for each predictor and allele a column `predictor_allele_predictionMetric` is created."
                 }
             }
         },
diff --git a/subworkflows/local/mhc_binding_prediction/main.nf b/subworkflows/local/mhc_binding_prediction/main.nf
index 246efe1..7213397 100755
--- a/subworkflows/local/mhc_binding_prediction/main.nf
+++ b/subworkflows/local/mhc_binding_prediction/main.nf
@@ -1,13 +1,13 @@
-include { PREPARE_PREDICTION_INPUT                  } from '../../../modules/local/prepare_prediction_input'
-include { SYFPEITHI                                 } from '../../../modules/local/syfpeithi'
-include { MHCFLURRY                                 } from '../../../modules/local/mhcflurry'
+include { PREPARE_PREDICTION_INPUT                   } from '../../../modules/local/prepare_prediction_input'
+include { SYFPEITHI                                  } from '../../../modules/local/syfpeithi'
+include { MHCFLURRY                                  } from '../../../modules/local/mhcflurry'
 include { MHCNUGGETS;
-        MHCNUGGETS as MHCNUGGETSII                  } from '../../../modules/local/mhcnuggets'
-include { NETMHCPAN                                 } from '../../../modules/local/netmhcpan'
-include { NETMHCIIPAN                               } from '../../../modules/local/netmhciipan'
+        MHCNUGGETS as MHCNUGGETSII                   } from '../../../modules/local/mhcnuggets'
+include { NETMHCPAN                                  } from '../../../modules/local/netmhcpan'
+include { NETMHCIIPAN                                } from '../../../modules/local/netmhciipan'
 include { UNPACK_NETMHC_SOFTWARE as NETMHCPAN_IMPORT;
-        UNPACK_NETMHC_SOFTWARE as NETMHCIIPAN_IMPORT} from '../../../modules/local/unpack_netmhc_software'
-include { MERGE_PREDICTIONS                         } from '../../../modules/local/merge_predictions'
+        UNPACK_NETMHC_SOFTWARE as NETMHCIIPAN_IMPORT } from '../../../modules/local/unpack_netmhc_software'
+include { MERGE_PREDICTIONS                          } from '../../../modules/local/merge_predictions'
 
 workflow MHC_BINDING_PREDICTION {
     take:
@@ -22,8 +22,13 @@ workflow MHC_BINDING_PREDICTION {
 
         validate_tools_param(tools)
 
+        // Add filename to meta to prevent overwriting identically named files
+        ch_peptides
+            .map { meta, file -> [meta + [file_id: meta.sample + '_' + file.baseName], file] }
+            .set { ch_peptides_to_predict }
+
         //prepare the input file
-        PREPARE_PREDICTION_INPUT( ch_peptides, supported_alleles_json)
+        PREPARE_PREDICTION_INPUT( ch_peptides_to_predict, supported_alleles_json)
             .prepared
             .transpose()
             .branch {
@@ -70,9 +75,13 @@ workflow MHC_BINDING_PREDICTION {
             ch_binding_predictors_out = ch_binding_predictors_out.mix(NETMHCIIPAN.out.predicted)
         }
 
-    MERGE_PREDICTIONS( ch_binding_predictors_out
-        .map { meta, file -> [meta.subMap('sample','alleles','mhc_class','input_type'), file] }
-        .groupTuple())
+    ch_binding_predictors_out
+        .map { meta, file -> [meta.subMap('sample','alleles','mhc_class','input_type','file_id'), file] }
+        .groupTuple()
+        .join(ch_peptides_to_predict)
+        .set { ch_binding_predictors_out_meta}
+
+    MERGE_PREDICTIONS( ch_binding_predictors_out_meta )
     ch_versions = ch_versions.mix(MERGE_PREDICTIONS.out.versions)
 
     emit:
@@ -80,8 +89,6 @@ workflow MHC_BINDING_PREDICTION {
     versions = ch_versions
 }
 
-
-
 //
 // Auxiliar Functions
 //
diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf
index 9f17651..09f3ec4 100644
--- a/workflows/epitopeprediction.nf
+++ b/workflows/epitopeprediction.nf
@@ -6,19 +6,14 @@
 //
 // MODULE: Local to the pipeline
 //
-include { VARIANT_SPLIT                                                            } from '../modules/local/variant_split'
-include { SNPSIFT_SPLIT                                                            } from '../modules/local/snpsift_split'
-include { EPYTOPE_GENERATE_PEPTIDES                                                } from '../modules/local/epytope_generate_peptides'
-include { SPLIT_PEPTIDES as SPLIT_PEPTIDES_PEPTIDES                                } from '../modules/local/split_peptides'
-include { SPLIT_PEPTIDES as SPLIT_PEPTIDES_PROTEIN                                 } from '../modules/local/split_peptides'
-include { EPYTOPE_PEPTIDE_PREDICTION as EPYTOPE_PEPTIDE_PREDICTION_PROTEIN         } from '../modules/local/epytope_peptide_prediction'
-include { EPYTOPE_PEPTIDE_PREDICTION as EPYTOPE_PEPTIDE_PREDICTION_PEP             } from '../modules/local/epytope_peptide_prediction'
-include { EPYTOPE_PEPTIDE_PREDICTION as EPYTOPE_PEPTIDE_PREDICTION_VAR             } from '../modules/local/epytope_peptide_prediction'
-include { CAT_FILES as CAT_TSV                                                     } from '../modules/local/cat_files'
-include { CAT_FILES as CAT_FASTA                                                   } from '../modules/local/cat_files'
-include { CSVTK_CONCAT                                                             } from '../modules/local/csvtk_concat'
-include { MERGE_JSON as MERGE_JSON_SINGLE                                          } from '../modules/local/merge_json'
-include { MERGE_JSON as MERGE_JSON_MULTI                                           } from '../modules/local/merge_json'
+include { VARIANT_SPLIT                                                } from '../modules/local/variant_split'
+include { SNPSIFT_SPLIT                                                } from '../modules/local/snpsift_split'
+include { FASTA2PEPTIDES                                               } from '../modules/local/fasta2peptides'
+include { SPLIT_PEPTIDES                                               } from '../modules/local/split_peptides'
+include { EPYTOPE_PEPTIDE_PREDICTION as EPYTOPE_PEPTIDE_PREDICTION_VAR } from '../modules/local/epytope_peptide_prediction'
+include { CAT_FILES as CAT_TSV                                         } from '../modules/local/cat_files'
+include { CAT_FILES as CAT_FASTA                                       } from '../modules/local/cat_files'
+include { CSVTK_CONCAT                                                 } from '../modules/nf-core/csvtk/concat'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -132,18 +127,8 @@ workflow EPITOPEPREDICTION {
     //    RUN EPITOPE PREDICTION
     //========================================================================================
     //*/
-    // TODO: Implement Fasta parsing
-    //// not sure if this is the best solution to also have a extra process for protein, but I think we need it for cases when we have both in one sheet? (CM)
-    //// run epitope prediction for proteins
-    //EPYTOPE_PEPTIDE_PREDICTION_PROTEIN(
-    //    SPLIT_PEPTIDES_PROTEIN
-    //        .out
-    //        .splitted
-    //        .combine( ch_prediction_tool_versions )
-    //        .transpose(),
-    //        EXTERNAL_TOOLS_IMPORT.out.nonfree_tools.collect().ifEmpty([])
-    //)
-    //ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_PROTEIN.out.versions )
+    FASTA2PEPTIDES( ch_samples_uncompressed.protein )
+    ch_versions = ch_versions.mix( FASTA2PEPTIDES.out.versions )
 
     //ch_prediction_input = SPLIT_PEPTIDES_PEPTIDES.out.splitted.transpose()
 
@@ -157,11 +142,27 @@ workflow EPITOPEPREDICTION {
     //        EXTERNAL_TOOLS_IMPORT.out.nonfree_tools.collect().ifEmpty([])
     //)
     //ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_VAR.out.versions )
+    // TODO: Speed up prediction by splitting large files
+    ch_to_predict = ch_samples_uncompressed.peptide
+                        .mix(FASTA2PEPTIDES.out.tsv.transpose())
+
+    // Split tsv if size exceeds params.peptides_split_minchunksize
+    SPLIT_PEPTIDES(ch_to_predict)
+    ch_versions = ch_versions.mix(SPLIT_PEPTIDES.out.versions)
 
-    MHC_BINDING_PREDICTION( ch_samples_uncompressed.peptide, params.tools, supported_alleles_json, netmhc_software_meta)
+    //
+    // SUBWORKFLOW: MHC Binding Prediction
+    //
+    MHC_BINDING_PREDICTION( SPLIT_PEPTIDES.out.splitted.transpose(),
+                            params.tools,
+                            supported_alleles_json,
+                            netmhc_software_meta)
     ch_versions = ch_versions.mix(MHC_BINDING_PREDICTION.out.versions)
-    ch_predicted_peptides = MHC_BINDING_PREDICTION.out.predicted
 
+    // TODO: Fix meta.id / meta.sample
+    CSVTK_CONCAT(MHC_BINDING_PREDICTION.out.predicted
+                    .map { meta, file -> [meta.subMap('sample','alleles','mhc_class'), file] }
+                    .groupTuple(), "tsv", "tsv")
 
     //
     // Collate and save software versions